Skip to content

Commit 953cd06

Browse files
committed
build: add armv7 support
1 parent 04a64be commit 953cd06

File tree

7 files changed

+28
-12
lines changed

7 files changed

+28
-12
lines changed

CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,10 @@ macro(ct2_compile_kernels_for_isa isa flag)
229229
list(APPEND SOURCES ${CMAKE_CURRENT_BINARY_DIR}/kernels_${isa}.cc)
230230
endmacro()
231231

232-
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(arm64)|(aarch64)"
232+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(arm64)|(aarch64)|(armv7-a)"
233233
OR (APPLE AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64"))
234-
add_definitions(-DCT2_ARM64_BUILD)
235-
set(CT2_BUILD_ARCH "arm64")
234+
add_definitions(-DCT2_ARM_BUILD)
235+
set(CT2_BUILD_ARCH "arm")
236236
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(amd64)|(AMD64)")
237237
add_definitions(-DCT2_X86_BUILD)
238238
set(CT2_BUILD_ARCH "x86_64")
@@ -261,7 +261,7 @@ if(ENABLE_CPU_DISPATCH)
261261
ct2_compile_kernels_for_isa(avx2 "-mavx2 -mfma")
262262
ct2_compile_kernels_for_isa(avx512 "-mavx512f -mavx512cd -mavx512vl -mavx512bw -mavx512dq")
263263
endif()
264-
elseif(CT2_BUILD_ARCH STREQUAL "arm64")
264+
elseif(CT2_BUILD_ARCH STREQUAL "arm")
265265
ct2_compile_kernels_for_isa(neon "-DUSE_NEON")
266266
endif()
267267
endif()

src/cpu/cpu_info.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ namespace ctranslate2 {
4242
}
4343
}
4444

45-
#elif defined(CT2_ARM64_BUILD)
45+
#elif defined(CT2_ARM_BUILD)
4646

4747
namespace ctranslate2 {
4848
namespace cpu {

src/cpu/cpu_info.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ namespace ctranslate2 {
1212
bool cpu_supports_avx();
1313
bool cpu_supports_avx2();
1414
bool cpu_supports_avx512();
15-
#elif defined(CT2_ARM64_BUILD)
15+
#elif defined(CT2_ARM_BUILD)
1616
bool cpu_supports_neon();
1717
#endif
1818

src/cpu/cpu_isa.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ namespace ctranslate2 {
3232
return "AVX2";
3333
case CpuIsa::AVX512:
3434
return "AVX512";
35-
#elif defined(CT2_ARM64_BUILD)
35+
#elif defined(CT2_ARM_BUILD)
3636
case CpuIsa::NEON:
3737
return "NEON";
3838
#endif
@@ -51,7 +51,7 @@ namespace ctranslate2 {
5151
return try_isa(env_isa, CpuIsa::AVX2, cpu_supports_avx2());
5252
if (env_isa == "AVX")
5353
return try_isa(env_isa, CpuIsa::AVX, cpu_supports_avx());
54-
#elif defined(CT2_ARM64_BUILD)
54+
#elif defined(CT2_ARM_BUILD)
5555
if (env_isa == "NEON")
5656
return try_isa(env_isa, CpuIsa::NEON, cpu_supports_neon());
5757
#endif
@@ -68,7 +68,7 @@ namespace ctranslate2 {
6868
return CpuIsa::AVX2;
6969
if (cpu_supports_avx())
7070
return CpuIsa::AVX;
71-
# elif defined(CT2_ARM64_BUILD)
71+
# elif defined(CT2_ARM_BUILD)
7272
if (cpu_supports_neon())
7373
return CpuIsa::NEON;
7474
# endif

src/cpu/cpu_isa.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ namespace ctranslate2 {
1111
AVX,
1212
AVX2,
1313
AVX512,
14-
#elif defined(CT2_ARM64_BUILD)
14+
#elif defined(CT2_ARM_BUILD)
1515
NEON,
1616
#endif
1717
};
@@ -48,7 +48,7 @@ namespace ctranslate2 {
4848
CPU_ISA_CASE(cpu::CpuIsa::AVX, SINGLE_ARG(STMTS)) \
4949
CPU_ISA_DEFAULT(cpu::CpuIsa::GENERIC, SINGLE_ARG(STMTS)) \
5050
}
51-
#elif defined(CT2_ARM64_BUILD)
51+
#elif defined(CT2_ARM_BUILD)
5252
# define CPU_ISA_DISPATCH(STMTS) \
5353
switch (cpu::get_cpu_isa()) { \
5454
CPU_ISA_CASE(cpu::CpuIsa::NEON, SINGLE_ARG(STMTS)) \

src/cpu/vec_neon.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,19 +144,35 @@ namespace ctranslate2 {
144144
}
145145

146146
static inline value_type div(value_type a, value_type b) {
147+
#ifdef __aarch64__
147148
return vdivq_f32(a, b);
149+
#else
150+
return a / b;
151+
#endif
148152
}
149153

150154
static inline value_type mul_add(value_type a, value_type b, value_type c) {
155+
#ifdef __aarch64__
151156
return vfmaq_f32(c, a, b);
157+
#else
158+
return a * b + c;
159+
#endif
152160
}
153161

154162
static inline float reduce_add(value_type a) {
163+
#ifdef __aarch64__
155164
return vaddvq_f32(a);
165+
#else
166+
return a[0] + a[1] + a[2] + a[3];
167+
#endif
156168
}
157169

158170
static inline float reduce_max(value_type a) {
171+
#ifdef __aarch64__
159172
return vmaxvq_f32(a);
173+
#else
174+
return std::max({a[0], a[1], a[2], a[3]});
175+
#endif
160176
}
161177

162178
static inline value_type round(value_type v) {

src/utils.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ namespace ctranslate2 {
3838
cpu::cpu_supports_avx(),
3939
cpu::cpu_supports_avx2(),
4040
cpu::cpu_supports_avx512());
41-
#elif defined(CT2_ARM64_BUILD)
41+
#elif defined(CT2_ARM_BUILD)
4242
spdlog::info("CPU: {} (NEON={})",
4343
cpu::cpu_vendor(),
4444
cpu::cpu_supports_neon());

0 commit comments

Comments
 (0)