File tree Expand file tree Collapse file tree 7 files changed +28
-12
lines changed Expand file tree Collapse file tree 7 files changed +28
-12
lines changed Original file line number Diff line number Diff line change @@ -208,10 +208,10 @@ macro(ct2_compile_kernels_for_isa isa flag)
208
208
list (APPEND SOURCES ${CMAKE_CURRENT_BINARY_DIR} /kernels_${isa}.cc )
209
209
endmacro ()
210
210
211
- if (CMAKE_SYSTEM_PROCESSOR MATCHES "(arm64)|(aarch64)"
211
+ if (CMAKE_SYSTEM_PROCESSOR MATCHES "(arm64)|(aarch64)|(armv7-a) "
212
212
OR (APPLE AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" ))
213
- add_definitions (-DCT2_ARM64_BUILD )
214
- set (CT2_BUILD_ARCH "arm64 " )
213
+ add_definitions (-DCT2_ARM_BUILD )
214
+ set (CT2_BUILD_ARCH "arm " )
215
215
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(amd64)|(AMD64)" )
216
216
add_definitions (-DCT2_X86_BUILD )
217
217
set (CT2_BUILD_ARCH "x86_64" )
@@ -240,7 +240,7 @@ if(ENABLE_CPU_DISPATCH)
240
240
ct2_compile_kernels_for_isa (avx2 "-mavx2 -mfma" )
241
241
ct2_compile_kernels_for_isa (avx512 "-mavx512f -mavx512cd -mavx512vl -mavx512bw -mavx512dq" )
242
242
endif ()
243
- elseif (CT2_BUILD_ARCH STREQUAL "arm64 " )
243
+ elseif (CT2_BUILD_ARCH STREQUAL "arm " )
244
244
ct2_compile_kernels_for_isa (neon "-DUSE_NEON" )
245
245
endif ()
246
246
endif ()
Original file line number Diff line number Diff line change @@ -42,7 +42,7 @@ namespace ctranslate2 {
42
42
}
43
43
}
44
44
45
- #elif defined(CT2_ARM64_BUILD )
45
+ #elif defined(CT2_ARM_BUILD )
46
46
47
47
namespace ctranslate2 {
48
48
namespace cpu {
Original file line number Diff line number Diff line change @@ -12,7 +12,7 @@ namespace ctranslate2 {
12
12
bool cpu_supports_avx ();
13
13
bool cpu_supports_avx2 ();
14
14
bool cpu_supports_avx512 ();
15
- #elif defined(CT2_ARM64_BUILD )
15
+ #elif defined(CT2_ARM_BUILD )
16
16
bool cpu_supports_neon ();
17
17
#endif
18
18
Original file line number Diff line number Diff line change @@ -32,7 +32,7 @@ namespace ctranslate2 {
32
32
return " AVX2" ;
33
33
case CpuIsa::AVX512:
34
34
return " AVX512" ;
35
- #elif defined(CT2_ARM64_BUILD )
35
+ #elif defined(CT2_ARM_BUILD )
36
36
case CpuIsa::NEON:
37
37
return " NEON" ;
38
38
#endif
@@ -51,7 +51,7 @@ namespace ctranslate2 {
51
51
return try_isa (env_isa, CpuIsa::AVX2, cpu_supports_avx2 ());
52
52
if (env_isa == " AVX" )
53
53
return try_isa (env_isa, CpuIsa::AVX, cpu_supports_avx ());
54
- #elif defined(CT2_ARM64_BUILD )
54
+ #elif defined(CT2_ARM_BUILD )
55
55
if (env_isa == " NEON" )
56
56
return try_isa (env_isa, CpuIsa::NEON, cpu_supports_neon ());
57
57
#endif
@@ -68,7 +68,7 @@ namespace ctranslate2 {
68
68
return CpuIsa::AVX2;
69
69
if (cpu_supports_avx ())
70
70
return CpuIsa::AVX;
71
- # elif defined(CT2_ARM64_BUILD )
71
+ # elif defined(CT2_ARM_BUILD )
72
72
if (cpu_supports_neon ())
73
73
return CpuIsa::NEON;
74
74
# endif
Original file line number Diff line number Diff line change @@ -11,7 +11,7 @@ namespace ctranslate2 {
11
11
AVX,
12
12
AVX2,
13
13
AVX512,
14
- #elif defined(CT2_ARM64_BUILD )
14
+ #elif defined(CT2_ARM_BUILD )
15
15
NEON,
16
16
#endif
17
17
};
@@ -48,7 +48,7 @@ namespace ctranslate2 {
48
48
CPU_ISA_CASE (cpu::CpuIsa::AVX, SINGLE_ARG (STMTS)) \
49
49
CPU_ISA_DEFAULT (cpu::CpuIsa::GENERIC, SINGLE_ARG (STMTS)) \
50
50
}
51
- #elif defined(CT2_ARM64_BUILD )
51
+ #elif defined(CT2_ARM_BUILD )
52
52
# define CPU_ISA_DISPATCH (STMTS ) \
53
53
switch (cpu::get_cpu_isa()) { \
54
54
CPU_ISA_CASE (cpu::CpuIsa::NEON, SINGLE_ARG (STMTS)) \
Original file line number Diff line number Diff line change @@ -144,19 +144,35 @@ namespace ctranslate2 {
144
144
}
145
145
146
146
static inline value_type div (value_type a, value_type b) {
147
+ #ifdef __aarch64__
147
148
return vdivq_f32 (a, b);
149
+ #else
150
+ return a / b;
151
+ #endif
148
152
}
149
153
150
154
static inline value_type mul_add (value_type a, value_type b, value_type c) {
155
+ #ifdef __aarch64__
151
156
return vfmaq_f32 (c, a, b);
157
+ #else
158
+ return a * b + c;
159
+ #endif
152
160
}
153
161
154
162
static inline float reduce_add (value_type a) {
163
+ #ifdef __aarch64__
155
164
return vaddvq_f32 (a);
165
+ #else
166
+ return a[0 ] + a[1 ] + a[2 ] + a[3 ];
167
+ #endif
156
168
}
157
169
158
170
static inline float reduce_max (value_type a) {
171
+ #ifdef __aarch64__
159
172
return vmaxvq_f32 (a);
173
+ #else
174
+ return std::max ({a[0 ], a[1 ], a[2 ], a[3 ]});
175
+ #endif
160
176
}
161
177
162
178
};
Original file line number Diff line number Diff line change @@ -38,7 +38,7 @@ namespace ctranslate2 {
38
38
cpu::cpu_supports_avx (),
39
39
cpu::cpu_supports_avx2 (),
40
40
cpu::cpu_supports_avx512 ());
41
- #elif defined(CT2_ARM64_BUILD )
41
+ #elif defined(CT2_ARM_BUILD )
42
42
spdlog::info (" CPU: {} (NEON={})" ,
43
43
cpu::cpu_vendor (),
44
44
cpu::cpu_supports_neon ());
You can’t perform that action at this time.
0 commit comments