|
17 | 17 | # ------------------------------------------------
|
18 | 18 | AC_DEFUN([MCA_ompi_op_aarch64_CONFIG],[
|
19 | 19 | AC_CONFIG_FILES([ompi/mca/op/aarch64/Makefile])
|
| 20 | + |
20 | 21 | case "${host}" in
|
21 | 22 | aarch64*|arm64*)
|
22 | 23 | op_aarch64_check="yes";;
|
@@ -71,49 +72,95 @@ AC_DEFUN([MCA_ompi_op_aarch64_CONFIG],[
|
71 | 72 | [op_cv_neon_fp_support=yes],
|
72 | 73 | [op_cv_neon_fp_support=no])])])
|
73 | 74 |
|
74 |
| - # |
| 75 | + |
| 76 | + # |
75 | 77 | # Check for SVE support
|
76 | 78 | #
|
77 |
| - AC_CACHE_CHECK([for SVE support], op_cv_sve_support, |
78 |
| - [AS_IF([test "$op_cv_neon_support" = "yes"], |
79 |
| - [ |
80 |
| - AC_LINK_IFELSE( |
81 |
| - [AC_LANG_PROGRAM([[ |
| 79 | + AC_CACHE_CHECK([for SVE support], [op_cv_sve_support], [ |
| 80 | + AC_MSG_RESULT([]) |
| 81 | + # initialize result variables |
| 82 | + op_cv_sve_support=no |
| 83 | + op_cv_sve_add_flags=no |
| 84 | + |
| 85 | + # first attempt: no extra flags |
| 86 | + AC_MSG_CHECKING([for SVE support (no additional flags)]) |
| 87 | + AC_LINK_IFELSE( |
| 88 | + [AC_LANG_SOURCE([[ |
82 | 89 | #if defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
|
83 |
| -#include <arm_sve.h> |
| 90 | + #include <arm_sve.h> |
84 | 91 | #else
|
85 |
| -#error "No support for __aarch64__ or SVE" |
| 92 | + #error "No support for __aarch64__ or SVE" |
86 | 93 | #endif
|
87 |
| - ]], |
88 |
| - [[ |
89 |
| -#if defined(__aarch64__) && defined(_ARM_FEATURE_SVE) |
90 |
| - svfloat32_t vA; |
91 |
| - vA = svdup_n_f32(0) |
| 94 | + |
| 95 | +int main(void) { |
| 96 | + svfloat32_t vA; |
| 97 | + vA = svdup_n_f32(0); |
| 98 | + return 0; |
| 99 | +} |
| 100 | + ]])], |
| 101 | + [ op_cv_sve_support=yes |
| 102 | + AC_MSG_RESULT([yes]) ], |
| 103 | + [ AC_MSG_RESULT([no ]) ] |
| 104 | + ) |
| 105 | + |
| 106 | + # second attempt: use +sve attribute |
| 107 | + AS_IF([test "$op_cv_sve_support" = "no"],[ |
| 108 | + AC_MSG_CHECKING([for SVE support (with +sve)]) |
| 109 | + AC_LINK_IFELSE( |
| 110 | + [AC_LANG_SOURCE([[ |
| 111 | +#if defined(__aarch64__) |
| 112 | + #include <arm_sve.h> |
| 113 | +#else |
| 114 | + #error "not on aarch64" |
92 | 115 | #endif
|
93 |
| - ]])], |
94 |
| - [op_cv_sve_support=yes], |
95 |
| - [op_cv_sve_support=no])])]) |
96 |
| - ]) |
97 | 116 |
|
| 117 | +__attribute__((__target__("+sve"))) |
| 118 | +int main(void) { |
| 119 | + svbool_t pg = svptrue_b32(); |
| 120 | + svuint32_t a = svdup_u32(0); |
| 121 | + svuint32_t b = svdup_u32(0); |
| 122 | + svuint32_t c = svadd_u32_m(pg, a, b); |
| 123 | + return (int)svaddv_u32(pg, c); |
| 124 | +} |
| 125 | + ]])], |
| 126 | + [ op_cv_sve_support=yes |
| 127 | + op_cv_sve_add_flags=yes |
| 128 | + AC_MSG_RESULT([yes]) ], |
| 129 | + [ AC_MSG_RESULT([no ]) ] |
| 130 | + ) |
| 131 | + ]) |
| 132 | + ]) |
| 133 | + |
| 134 | + # restore the language after our C tests |
| 135 | + AC_LANG_POP |
| 136 | +]) |
98 | 137 | AM_CONDITIONAL([MCA_BUILD_ompi_op_has_neon_support],
|
99 | 138 | [test "$op_cv_neon_support" = "yes"])
|
100 | 139 | AM_CONDITIONAL([MCA_BUILD_ompi_op_has_neon_fp_support],
|
101 | 140 | [test "$op_cv_neon_fp_support" = "yes"])
|
102 | 141 | AM_CONDITIONAL([MCA_BUILD_ompi_op_has_sve_support],
|
103 | 142 | [test "$op_cv_sve_support" = "yes"])
|
| 143 | + AM_CONDITIONAL([MCA_BUILD_ompi_op_sve_add_flags], |
| 144 | + [test "$op_cv_sve_add_flags" = "yes"]) |
| 145 | + |
104 | 146 | AC_SUBST(MCA_BUILD_ompi_op_has_neon_support)
|
105 | 147 | AC_SUBST(MCA_BUILD_ompi_op_has_neon_fp_support)
|
106 | 148 | AC_SUBST(MCA_BUILD_ompi_op_has_sve_support)
|
| 149 | + AC_SUBST(MCA_BUILD_ompi_op_sve_add_flags) |
107 | 150 |
|
108 | 151 | AS_IF([test "$op_cv_neon_support" = "yes"],
|
109 | 152 | [AC_DEFINE([OMPI_MCA_OP_HAVE_NEON], [1],[NEON supported in the current build])])
|
110 | 153 | AS_IF([test "$op_cv_neon_fp_support" = "yes"],
|
111 | 154 | [AC_DEFINE([OMPI_MCA_OP_HAVE_NEON_FP], [1],[NEON FP supported in the current build])])
|
112 | 155 | AS_IF([test "$op_cv_sve_support" = "yes"],
|
113 | 156 | [AC_DEFINE([OMPI_MCA_OP_HAVE_SVE], [1],[SVE supported in the current build])])
|
| 157 | + AS_IF([test "$op_cv_sve_add_flags" = "yes"], |
| 158 | + [AC_DEFINE([OMPI_MCA_OP_SVE_EXTRA_FLAGS], [1],[SVE supported with additional compile attributes])]) |
114 | 159 |
|
115 |
| - # If we have at least support for Neon |
116 |
| - AS_IF([test "$op_cv_neon_support" = "yes"], |
| 160 | + |
| 161 | + # If we have at least support for Neon or SVE |
| 162 | + AS_IF([test "$op_cv_neon_support" = "yes" || test "$op_cv_sve_support" = "yes" ], |
117 | 163 | [$1],
|
118 | 164 | [$2])
|
| 165 | + |
119 | 166 | ])dnl
|
0 commit comments