diff --git a/CMakeLists.txt b/CMakeLists.txt index 2443c6aa..e87a0528 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,6 +39,7 @@ option(ASTCENC_ISA_AVX2 "Enable astcenc builds for AVX2 SIMD") option(ASTCENC_ISA_SSE41 "Enable astcenc builds for SSE4.1 SIMD") option(ASTCENC_ISA_SSE2 "Enable astcenc builds for SSE2 SIMD") option(ASTCENC_ISA_SVE_256 "Enable astcenc builds for 256-bit SVE SIMD") +option(ASTCENC_ISA_SVE_128 "Enable astcenc builds for 128-bit SVE SIMD") option(ASTCENC_ISA_NEON "Enable astcenc builds for NEON SIMD") option(ASTCENC_ISA_NONE "Enable astcenc builds for no SIMD") option(ASTCENC_ISA_NATIVE "Enable astcenc builds for native SIMD") @@ -87,7 +88,7 @@ endforeach() # Count options which MUST be arm64 set(ASTCENC_ARM64_ISA_COUNT 0) -set(ASTCENC_CONFIGS ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_SVE_256}) +set(ASTCENC_CONFIGS ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_SVE_128} ${ASTCENC_ISA_SVE_256}) foreach(ASTCENC_CONFIG ${ASTCENC_CONFIGS}) if(${ASTCENC_CONFIG}) math(EXPR ASTCENC_ARM64_ISA_COUNT "${ASTCENC_ARM64_ISA_COUNT} + 1") @@ -120,6 +121,7 @@ endif() message(STATUS "Arm backend options") printopt("SVE 256b backend " ${ASTCENC_ISA_SVE_256}) +printopt("SVE 128b backend " ${ASTCENC_ISA_SVE_128}) printopt("NEON backend " ${ASTCENC_ISA_NEON}) message(STATUS "x86-64 backend options") printopt("AVX2 backend " ${ASTCENC_ISA_AVX2}) diff --git a/Docs/ChangeLog-4x.md b/Docs/ChangeLog-4x.md index 8405dcc1..e4f601b0 100644 --- a/Docs/ChangeLog-4x.md +++ b/Docs/ChangeLog-4x.md @@ -18,6 +18,7 @@ The 4.9.0 release is a minor maintenance release. reference implementation. * **Bug fix:** Fixed sincos table index under/overflow. * **Feature:** Added backend for Arm SVE fixed-width 256-bit builds. + * **Feature:** Added backend for Arm SVE fixed-width 128-bit builds. * **Feature:** Optimized NEON mask `any()` and `all()` functions. * **Feature:** Migrated build and test to GitHub Actions pipelines. diff --git a/Source/CMakeLists.txt b/Source/CMakeLists.txt index dd04b23f..b19dbe77 100644 --- a/Source/CMakeLists.txt +++ b/Source/CMakeLists.txt @@ -27,8 +27,8 @@ else() set(ASTCENC_CODEC enc) endif() -set(ASTCENC_ARTIFACTS native none sve_256 neon avx2 sse4.1 sse2) -set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2}) +set(ASTCENC_ARTIFACTS native none sve_256 sve_128 neon avx2 sse4.1 sse2) +set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_SVE_128} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2}) list(LENGTH ASTCENC_ARTIFACTS ASTCENC_ARTIFACTS_LEN) math(EXPR ASTCENC_ARTIFACTS_LEN "${ASTCENC_ARTIFACTS_LEN} - 1") @@ -40,6 +40,8 @@ foreach(INDEX RANGE ${ASTCENC_ARTIFACTS_LEN}) if(${ASTCENC_ISA_SIMD} MATCHES "sve_256") # Not suported on macOS + elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128") + # Not suported on macOS elseif(${ASTCENC_ISA_SIMD} MATCHES "neon") set(CMAKE_OSX_ARCHITECTURES arm64) elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2") diff --git a/Source/UnitTest/CMakeLists.txt b/Source/UnitTest/CMakeLists.txt index dc4bacad..9e413567 100644 --- a/Source/UnitTest/CMakeLists.txt +++ b/Source/UnitTest/CMakeLists.txt @@ -15,8 +15,8 @@ # under the License. # ---------------------------------------------------------------------------- -set(ASTCENC_ARTIFACTS native none sve_256 neon avx2 sse4.1 sse2) -set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2}) +set(ASTCENC_ARTIFACTS native none sve_256 sve_128 neon avx2 sse4.1 sse2) +set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_SVE_128} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2}) list(LENGTH ASTCENC_ARTIFACTS ASTCENC_ARTIFACTS_LEN) math(EXPR ASTCENC_ARTIFACTS_LEN "${ASTCENC_ARTIFACTS_LEN} - 1") @@ -28,6 +28,8 @@ foreach(INDEX RANGE ${ASTCENC_ARTIFACTS_LEN}) if(${ASTCENC_ISA_SIMD} MATCHES "sve_256") # Not supported on macOS + elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128") + # Not supported on macOS elseif(${ASTCENC_ISA_SIMD} MATCHES "neon") set(CMAKE_OSX_ARCHITECTURES arm64) elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2") diff --git a/Source/UnitTest/cmake_core.cmake b/Source/UnitTest/cmake_core.cmake index eeb780f3..1bf9ddad 100644 --- a/Source/UnitTest/cmake_core.cmake +++ b/Source/UnitTest/cmake_core.cmake @@ -104,6 +104,21 @@ elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_256") PRIVATE -march=armv8-a+sve -msve-vector-bits=256) +elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128") + target_compile_definitions(${ASTCENC_TEST} + PRIVATE + ASTCENC_NEON=1 + ASTCENC_SVE=4 + ASTCENC_SSE=0 + ASTCENC_AVX=0 + ASTCENC_POPCNT=0 + ASTCENC_F16C=0) + + # Enable SVE + target_compile_options(${ASTCENC_TEST} + PRIVATE + -march=armv8-a+sve -msve-vector-bits=128) + elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2") target_compile_definitions(${ASTCENC_TEST} PRIVATE diff --git a/Source/astcenccli_toplevel_help.cpp b/Source/astcenccli_toplevel_help.cpp index b01da6bd..ad8ef3f2 100644 --- a/Source/astcenccli_toplevel_help.cpp +++ b/Source/astcenccli_toplevel_help.cpp @@ -562,6 +562,8 @@ void astcenc_print_header() const char* simdtype = "sse2"; #elif (ASTCENC_SVE == 8) const char* simdtype = "sve.256b"; +#elif (ASTCENC_SVE == 4) + const char* simdtype = "sve.128b"; #elif (ASTCENC_NEON == 1) const char* simdtype = "neon"; #else diff --git a/Source/cmake_core.cmake b/Source/cmake_core.cmake index 0ff759ce..1f6a4d73 100644 --- a/Source/cmake_core.cmake +++ b/Source/cmake_core.cmake @@ -325,6 +325,29 @@ macro(astcenc_set_properties ASTCENC_TARGET_NAME ASTCENC_VENEER_TYPE) -march=armv8-a+sve) endif() + elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128") + target_compile_definitions(${ASTCENC_TARGET_NAME} + PRIVATE + ASTCENC_NEON=1 + ASTCENC_SVE=4 + ASTCENC_SSE=0 + ASTCENC_AVX=0 + ASTCENC_POPCNT=0 + ASTCENC_F16C=0) + + # Enable SVE in the core library + if (NOT ${ASTCENC_VENEER_TYPE}) + target_compile_options(${ASTCENC_TARGET_NAME} + PRIVATE + -march=armv8-a+sve -msve-vector-bits=128) + + # Enable SVE without fixed vector length in the veneer + elseif (${ASTCENC_VENEER_TYPE} EQUAL 2) + target_compile_options(${ASTCENC_TARGET_NAME} + PRIVATE + -march=armv8-a+sve) + endif() + elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2") target_compile_definitions(${ASTCENC_TARGET_NAME} PRIVATE diff --git a/Test/astc_test_image.py b/Test/astc_test_image.py index da1478d7..ffc1ee05 100644 --- a/Test/astc_test_image.py +++ b/Test/astc_test_image.py @@ -306,11 +306,11 @@ def parse_command_line(): "ref-2.5-neon", "ref-2.5-sse2", "ref-2.5-sse4.1", "ref-2.5-avx2", "ref-3.7-neon", "ref-3.7-sse2", "ref-3.7-sse4.1", "ref-3.7-avx2", "ref-4.8-neon", "ref-4.8-sse2", "ref-4.8-sse4.1", "ref-4.8-avx2", - "ref-main-neon", "ref-main-sve_256", "ref-main-sse2", "ref-main-sse4.1", "ref-main-avx2"] + "ref-main-neon", "ref-main-sve_256", "ref-main-sve_128", "ref-main-sse2", "ref-main-sse4.1", "ref-main-avx2"] # All test encoders - testcoders = ["none", "neon", "sve_256", "sse2", "sse4.1", "avx2", "native", "universal"] - testcodersAArch64 = ["neon", "sve_256"] + testcoders = ["none", "neon", "sve_256", "sve_128", "sse2", "sse4.1", "avx2", "native", "universal"] + testcodersAArch64 = ["neon", "sve_256", "sve_128"] testcodersX86 = ["sse2", "sse4.1", "avx2"] coders = refcoders + testcoders + ["all-aarch64", "all-x86"]