From 8bf7f4830650b679d5ba9cfb7aaa3c835438d2f4 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 5 Sep 2024 14:19:43 +0000 Subject: [PATCH 1/5] Move SVE length check to dedicated veneer --- Source/astcenccli_entry2.cpp | 2 +- Source/cmake_core.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/astcenccli_entry2.cpp b/Source/astcenccli_entry2.cpp index 60fe6d4b..5bb1e296 100644 --- a/Source/astcenccli_entry2.cpp +++ b/Source/astcenccli_entry2.cpp @@ -62,7 +62,7 @@ int astcenc_main_veneer( if (svcntw() != ASTCENC_SVE) { int bits = ASTCENC_SVE * 32; - print_error("ERROR: Host SVE support is not a %u-bit implementation\n", bits); + print_error("ERROR: Host SVE support is not a %u bit implementation\n", bits); return 1; } #endif diff --git a/Source/cmake_core.cmake b/Source/cmake_core.cmake index 0ff759ce..0d500e2a 100644 --- a/Source/cmake_core.cmake +++ b/Source/cmake_core.cmake @@ -353,7 +353,7 @@ macro(astcenc_set_properties ASTCENC_TARGET_NAME ASTCENC_VENEER_TYPE) ASTCENC_POPCNT=1 ASTCENC_F16C=0) - if (${ASTCENC_VENEER_TYPE} GREATER 0) + if (NOT ${ASTCENC_VENEER_TYPE}) # Force SSE2 on AppleClang (normally SSE4.1 is the default) target_compile_options(${ASTCENC_TARGET_NAME} PRIVATE @@ -378,7 +378,7 @@ macro(astcenc_set_properties ASTCENC_TARGET_NAME ASTCENC_VENEER_TYPE) ASTCENC_POPCNT=1 ASTCENC_F16C=1) - if (${ASTCENC_VENEER_TYPE} GREATER 0) + if (NOT ${ASTCENC_VENEER_TYPE}) # Force SSE2 on AppleClang (normally SSE4.1 is the default) target_compile_options(${ASTCENC_TARGET_NAME} PRIVATE From ff0d0ae79c1431895cb74494e8197e96f1987d9d Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 5 Sep 2024 14:21:21 +0000 Subject: [PATCH 2/5] Tweak error message --- Source/astcenccli_entry2.cpp | 2 +- Source/cmake_core.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/astcenccli_entry2.cpp b/Source/astcenccli_entry2.cpp index 5bb1e296..60fe6d4b 100644 --- a/Source/astcenccli_entry2.cpp +++ b/Source/astcenccli_entry2.cpp @@ -62,7 +62,7 @@ int astcenc_main_veneer( if (svcntw() != ASTCENC_SVE) { int bits = ASTCENC_SVE * 32; - print_error("ERROR: Host SVE support is not a %u bit implementation\n", bits); + print_error("ERROR: Host SVE support is not a %u-bit implementation\n", bits); return 1; } #endif diff --git a/Source/cmake_core.cmake b/Source/cmake_core.cmake index 0d500e2a..0ff759ce 100644 --- a/Source/cmake_core.cmake +++ b/Source/cmake_core.cmake @@ -353,7 +353,7 @@ macro(astcenc_set_properties ASTCENC_TARGET_NAME ASTCENC_VENEER_TYPE) ASTCENC_POPCNT=1 ASTCENC_F16C=0) - if (NOT ${ASTCENC_VENEER_TYPE}) + if (${ASTCENC_VENEER_TYPE} GREATER 0) # Force SSE2 on AppleClang (normally SSE4.1 is the default) target_compile_options(${ASTCENC_TARGET_NAME} PRIVATE @@ -378,7 +378,7 @@ macro(astcenc_set_properties ASTCENC_TARGET_NAME ASTCENC_VENEER_TYPE) ASTCENC_POPCNT=1 ASTCENC_F16C=1) - if (NOT ${ASTCENC_VENEER_TYPE}) + if (${ASTCENC_VENEER_TYPE} GREATER 0) # Force SSE2 on AppleClang (normally SSE4.1 is the default) target_compile_options(${ASTCENC_TARGET_NAME} PRIVATE From 9eb34bc22a883658326af0b2e5cc5e3dcce129b5 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 5 Sep 2024 14:32:12 +0000 Subject: [PATCH 3/5] Add sve_128 support --- CMakeLists.txt | 4 +++- Source/CMakeLists.txt | 6 ++++-- Source/UnitTest/CMakeLists.txt | 6 ++++-- Source/UnitTest/cmake_core.cmake | 15 +++++++++++++++ Source/cmake_core.cmake | 23 +++++++++++++++++++++++ Test/astc_test_image.py | 6 +++--- 6 files changed, 52 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2443c6aa..e87a0528 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,6 +39,7 @@ option(ASTCENC_ISA_AVX2 "Enable astcenc builds for AVX2 SIMD") option(ASTCENC_ISA_SSE41 "Enable astcenc builds for SSE4.1 SIMD") option(ASTCENC_ISA_SSE2 "Enable astcenc builds for SSE2 SIMD") option(ASTCENC_ISA_SVE_256 "Enable astcenc builds for 256-bit SVE SIMD") +option(ASTCENC_ISA_SVE_128 "Enable astcenc builds for 128-bit SVE SIMD") option(ASTCENC_ISA_NEON "Enable astcenc builds for NEON SIMD") option(ASTCENC_ISA_NONE "Enable astcenc builds for no SIMD") option(ASTCENC_ISA_NATIVE "Enable astcenc builds for native SIMD") @@ -87,7 +88,7 @@ endforeach() # Count options which MUST be arm64 set(ASTCENC_ARM64_ISA_COUNT 0) -set(ASTCENC_CONFIGS ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_SVE_256}) +set(ASTCENC_CONFIGS ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_SVE_128} ${ASTCENC_ISA_SVE_256}) foreach(ASTCENC_CONFIG ${ASTCENC_CONFIGS}) if(${ASTCENC_CONFIG}) math(EXPR ASTCENC_ARM64_ISA_COUNT "${ASTCENC_ARM64_ISA_COUNT} + 1") @@ -120,6 +121,7 @@ endif() message(STATUS "Arm backend options") printopt("SVE 256b backend " ${ASTCENC_ISA_SVE_256}) +printopt("SVE 128b backend " ${ASTCENC_ISA_SVE_128}) printopt("NEON backend " ${ASTCENC_ISA_NEON}) message(STATUS "x86-64 backend options") printopt("AVX2 backend " ${ASTCENC_ISA_AVX2}) diff --git a/Source/CMakeLists.txt b/Source/CMakeLists.txt index dd04b23f..b19dbe77 100644 --- a/Source/CMakeLists.txt +++ b/Source/CMakeLists.txt @@ -27,8 +27,8 @@ else() set(ASTCENC_CODEC enc) endif() -set(ASTCENC_ARTIFACTS native none sve_256 neon avx2 sse4.1 sse2) -set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2}) +set(ASTCENC_ARTIFACTS native none sve_256 sve_128 neon avx2 sse4.1 sse2) +set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_SVE_128} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2}) list(LENGTH ASTCENC_ARTIFACTS ASTCENC_ARTIFACTS_LEN) math(EXPR ASTCENC_ARTIFACTS_LEN "${ASTCENC_ARTIFACTS_LEN} - 1") @@ -40,6 +40,8 @@ foreach(INDEX RANGE ${ASTCENC_ARTIFACTS_LEN}) if(${ASTCENC_ISA_SIMD} MATCHES "sve_256") # Not suported on macOS + elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128") + # Not suported on macOS elseif(${ASTCENC_ISA_SIMD} MATCHES "neon") set(CMAKE_OSX_ARCHITECTURES arm64) elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2") diff --git a/Source/UnitTest/CMakeLists.txt b/Source/UnitTest/CMakeLists.txt index dc4bacad..9e413567 100644 --- a/Source/UnitTest/CMakeLists.txt +++ b/Source/UnitTest/CMakeLists.txt @@ -15,8 +15,8 @@ # under the License. # ---------------------------------------------------------------------------- -set(ASTCENC_ARTIFACTS native none sve_256 neon avx2 sse4.1 sse2) -set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2}) +set(ASTCENC_ARTIFACTS native none sve_256 sve_128 neon avx2 sse4.1 sse2) +set(ASTCENC_CONFIGS ${ASTCENC_ISA_NATIVE} ${ASTCENC_ISA_NONE} ${ASTCENC_ISA_SVE_256} ${ASTCENC_ISA_SVE_128} ${ASTCENC_ISA_NEON} ${ASTCENC_ISA_AVX2} ${ASTCENC_ISA_SSE41} ${ASTCENC_ISA_SSE2}) list(LENGTH ASTCENC_ARTIFACTS ASTCENC_ARTIFACTS_LEN) math(EXPR ASTCENC_ARTIFACTS_LEN "${ASTCENC_ARTIFACTS_LEN} - 1") @@ -28,6 +28,8 @@ foreach(INDEX RANGE ${ASTCENC_ARTIFACTS_LEN}) if(${ASTCENC_ISA_SIMD} MATCHES "sve_256") # Not supported on macOS + elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128") + # Not supported on macOS elseif(${ASTCENC_ISA_SIMD} MATCHES "neon") set(CMAKE_OSX_ARCHITECTURES arm64) elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2") diff --git a/Source/UnitTest/cmake_core.cmake b/Source/UnitTest/cmake_core.cmake index eeb780f3..1bf9ddad 100644 --- a/Source/UnitTest/cmake_core.cmake +++ b/Source/UnitTest/cmake_core.cmake @@ -104,6 +104,21 @@ elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_256") PRIVATE -march=armv8-a+sve -msve-vector-bits=256) +elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128") + target_compile_definitions(${ASTCENC_TEST} + PRIVATE + ASTCENC_NEON=1 + ASTCENC_SVE=4 + ASTCENC_SSE=0 + ASTCENC_AVX=0 + ASTCENC_POPCNT=0 + ASTCENC_F16C=0) + + # Enable SVE + target_compile_options(${ASTCENC_TEST} + PRIVATE + -march=armv8-a+sve -msve-vector-bits=128) + elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2") target_compile_definitions(${ASTCENC_TEST} PRIVATE diff --git a/Source/cmake_core.cmake b/Source/cmake_core.cmake index 0ff759ce..1f6a4d73 100644 --- a/Source/cmake_core.cmake +++ b/Source/cmake_core.cmake @@ -325,6 +325,29 @@ macro(astcenc_set_properties ASTCENC_TARGET_NAME ASTCENC_VENEER_TYPE) -march=armv8-a+sve) endif() + elseif(${ASTCENC_ISA_SIMD} MATCHES "sve_128") + target_compile_definitions(${ASTCENC_TARGET_NAME} + PRIVATE + ASTCENC_NEON=1 + ASTCENC_SVE=4 + ASTCENC_SSE=0 + ASTCENC_AVX=0 + ASTCENC_POPCNT=0 + ASTCENC_F16C=0) + + # Enable SVE in the core library + if (NOT ${ASTCENC_VENEER_TYPE}) + target_compile_options(${ASTCENC_TARGET_NAME} + PRIVATE + -march=armv8-a+sve -msve-vector-bits=128) + + # Enable SVE without fixed vector length in the veneer + elseif (${ASTCENC_VENEER_TYPE} EQUAL 2) + target_compile_options(${ASTCENC_TARGET_NAME} + PRIVATE + -march=armv8-a+sve) + endif() + elseif(${ASTCENC_ISA_SIMD} MATCHES "sse2") target_compile_definitions(${ASTCENC_TARGET_NAME} PRIVATE diff --git a/Test/astc_test_image.py b/Test/astc_test_image.py index da1478d7..ffc1ee05 100644 --- a/Test/astc_test_image.py +++ b/Test/astc_test_image.py @@ -306,11 +306,11 @@ def parse_command_line(): "ref-2.5-neon", "ref-2.5-sse2", "ref-2.5-sse4.1", "ref-2.5-avx2", "ref-3.7-neon", "ref-3.7-sse2", "ref-3.7-sse4.1", "ref-3.7-avx2", "ref-4.8-neon", "ref-4.8-sse2", "ref-4.8-sse4.1", "ref-4.8-avx2", - "ref-main-neon", "ref-main-sve_256", "ref-main-sse2", "ref-main-sse4.1", "ref-main-avx2"] + "ref-main-neon", "ref-main-sve_256", "ref-main-sve_128", "ref-main-sse2", "ref-main-sse4.1", "ref-main-avx2"] # All test encoders - testcoders = ["none", "neon", "sve_256", "sse2", "sse4.1", "avx2", "native", "universal"] - testcodersAArch64 = ["neon", "sve_256"] + testcoders = ["none", "neon", "sve_256", "sve_128", "sse2", "sse4.1", "avx2", "native", "universal"] + testcodersAArch64 = ["neon", "sve_256", "sve_128"] testcodersX86 = ["sse2", "sse4.1", "avx2"] coders = refcoders + testcoders + ["all-aarch64", "all-x86"] From 458604e8dd701ffd75c07f402a8678e082db00c2 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 5 Sep 2024 15:15:58 +0000 Subject: [PATCH 4/5] Add to changelog --- Docs/ChangeLog-4x.md | 1 + 1 file changed, 1 insertion(+) diff --git a/Docs/ChangeLog-4x.md b/Docs/ChangeLog-4x.md index 8405dcc1..e4f601b0 100644 --- a/Docs/ChangeLog-4x.md +++ b/Docs/ChangeLog-4x.md @@ -18,6 +18,7 @@ The 4.9.0 release is a minor maintenance release. reference implementation. * **Bug fix:** Fixed sincos table index under/overflow. * **Feature:** Added backend for Arm SVE fixed-width 256-bit builds. + * **Feature:** Added backend for Arm SVE fixed-width 128-bit builds. * **Feature:** Optimized NEON mask `any()` and `all()` functions. * **Feature:** Migrated build and test to GitHub Actions pipelines. From 46c19f9d2922033a3cfe1ab6bfca5f3c3c3727f2 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Tue, 10 Sep 2024 23:16:00 +0100 Subject: [PATCH 5/5] Update header build print --- Source/astcenccli_toplevel_help.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/astcenccli_toplevel_help.cpp b/Source/astcenccli_toplevel_help.cpp index b01da6bd..ad8ef3f2 100644 --- a/Source/astcenccli_toplevel_help.cpp +++ b/Source/astcenccli_toplevel_help.cpp @@ -562,6 +562,8 @@ void astcenc_print_header() const char* simdtype = "sse2"; #elif (ASTCENC_SVE == 8) const char* simdtype = "sve.256b"; +#elif (ASTCENC_SVE == 4) + const char* simdtype = "sve.128b"; #elif (ASTCENC_NEON == 1) const char* simdtype = "neon"; #else