diff --git a/Source/astcenc_averages_and_directions.cpp b/Source/astcenc_averages_and_directions.cpp index 5af58d7d..d1dea24e 100644 --- a/Source/astcenc_averages_and_directions.cpp +++ b/Source/astcenc_averages_and_directions.cpp @@ -68,7 +68,7 @@ static void compute_partition_averages_rgb( { vint texel_partition(pi.partition_of_texel + i); - vmask lane_mask = lane_id < vint(texel_count); + vmask lane_mask = lane_id < vint_from_size(texel_count); lane_id += vint(ASTCENC_SIMD_WIDTH); vmask p0_mask = lane_mask & (texel_partition == vint(0)); @@ -104,7 +104,7 @@ static void compute_partition_averages_rgb( { vint texel_partition(pi.partition_of_texel + i); - vmask lane_mask = lane_id < vint(texel_count); + vmask lane_mask = lane_id < vint_from_size(texel_count); lane_id += vint(ASTCENC_SIMD_WIDTH); vmask p0_mask = lane_mask & (texel_partition == vint(0)); @@ -149,7 +149,7 @@ static void compute_partition_averages_rgb( { vint texel_partition(pi.partition_of_texel + i); - vmask lane_mask = lane_id < vint(texel_count); + vmask lane_mask = lane_id < vint_from_size(texel_count); lane_id += vint(ASTCENC_SIMD_WIDTH); vmask p0_mask = lane_mask & (texel_partition == vint(0)); @@ -239,7 +239,7 @@ static void compute_partition_averages_rgba( { vint texel_partition(pi.partition_of_texel + i); - vmask lane_mask = lane_id < vint(texel_count); + vmask lane_mask = lane_id < vint_from_size(texel_count); lane_id += vint(ASTCENC_SIMD_WIDTH); vmask p0_mask = lane_mask & (texel_partition == vint(0)); @@ -279,7 +279,7 @@ static void compute_partition_averages_rgba( { vint texel_partition(pi.partition_of_texel + i); - vmask lane_mask = lane_id < vint(texel_count); + vmask lane_mask = lane_id < vint_from_size(texel_count); lane_id += vint(ASTCENC_SIMD_WIDTH); vmask p0_mask = lane_mask & (texel_partition == vint(0)); @@ -330,7 +330,7 @@ static void compute_partition_averages_rgba( { vint texel_partition(pi.partition_of_texel + i); - vmask lane_mask = lane_id < vint(texel_count); + vmask lane_mask = lane_id < vint_from_size(texel_count); lane_id += vint(ASTCENC_SIMD_WIDTH); vmask p0_mask = lane_mask & (texel_partition == vint(0)); @@ -777,7 +777,7 @@ void compute_error_squared_rgba( vint lane_ids = vint::lane_id(); for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { - vmask mask = lane_ids < vint(texel_count); + vmask mask = lane_ids < vint_from_size(texel_count); const uint8_t* texel_idxs = texel_indexes + i; vfloat data_r = gatherf_byte_inds(blk.data_r, texel_idxs); @@ -891,7 +891,7 @@ void compute_error_squared_rgb( vint lane_ids = vint::lane_id(); for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { - vmask mask = lane_ids < vint(texel_count); + vmask mask = lane_ids < vint_from_size(texel_count); const uint8_t* texel_idxs = texel_indexes + i; vfloat data_r = gatherf_byte_inds(blk.data_r, texel_idxs); diff --git a/Source/astcenc_mathlib.h b/Source/astcenc_mathlib.h index 1d73bf1d..5fd296d6 100644 --- a/Source/astcenc_mathlib.h +++ b/Source/astcenc_mathlib.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2024 Arm Limited +// Copyright 2011-2025 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -48,7 +48,7 @@ #define ASTCENC_SSE 42 #elif defined(__SSE4_1__) #define ASTCENC_SSE 41 - #elif defined(__SSE2__) + #elif defined(__SSE2__) || (defined(_M_AMD64) && !defined(_M_ARM64EC)) #define ASTCENC_SSE 20 #else #define ASTCENC_SSE 0 @@ -68,7 +68,7 @@ #endif #ifndef ASTCENC_NEON - #if defined(__aarch64__) + #if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #define ASTCENC_NEON 1 #else #define ASTCENC_NEON 0 diff --git a/Source/astcenc_pick_best_endpoint_format.cpp b/Source/astcenc_pick_best_endpoint_format.cpp index dc0af906..7de2f2db 100644 --- a/Source/astcenc_pick_best_endpoint_format.cpp +++ b/Source/astcenc_pick_best_endpoint_format.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2024 Arm Limited +// Copyright 2011-2025 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -1292,9 +1292,12 @@ unsigned int compute_ideal_endpoint_formats( vint vbest_error_index(-1); vfloat vbest_ep_error(ERROR_CALC_DEFAULT); - start_block_mode = round_down_to_simd_multiple_vla(start_block_mode); - vint lane_ids = vint::lane_id() + vint(start_block_mode); - for (unsigned int j = start_block_mode; j < end_block_mode; j += ASTCENC_SIMD_WIDTH) + // TODO: This should use size_t for the inputs of start/end_block_mode + // to avoid some of this type conversion, but that propagates and will + // need a bigger PR to fix + size_t start_mode = round_down_to_simd_multiple_vla(start_block_mode); + vint lane_ids = vint::lane_id() + vint_from_size(start_mode); + for (size_t j = start_mode; j < end_block_mode; j += ASTCENC_SIMD_WIDTH) { vfloat err = vfloat(errors_of_best_combination + j); vmask mask = err < vbest_ep_error; diff --git a/Source/astcenc_vecmathlib.h b/Source/astcenc_vecmathlib.h index 6f891ab3..50a588fc 100644 --- a/Source/astcenc_vecmathlib.h +++ b/Source/astcenc_vecmathlib.h @@ -104,6 +104,7 @@ template T gatherf_byte_inds(const float* base, const uint8_t* indic constexpr auto loada = vfloat8::loada; constexpr auto load1 = vfloat8::load1; + constexpr auto vint_from_size = vint8_from_size; #elif ASTCENC_SSE >= 20 // If we have SSE expose 4-wide VLA, and 4-wide fixed width. @@ -123,6 +124,7 @@ template T gatherf_byte_inds(const float* base, const uint8_t* indic constexpr auto loada = vfloat4::loada; constexpr auto load1 = vfloat4::load1; + constexpr auto vint_from_size = vint4_from_size; #elif ASTCENC_SVE == 8 // Check the compiler is configured with fixed-length 256-bit SVE. @@ -154,6 +156,7 @@ template T gatherf_byte_inds(const float* base, const uint8_t* indic constexpr auto loada = vfloat8::loada; constexpr auto load1 = vfloat8::load1; + constexpr auto vint_from_size = vint8_from_size; #elif ASTCENC_NEON > 0 // If we have NEON expose 4-wide VLA. @@ -173,6 +176,7 @@ template T gatherf_byte_inds(const float* base, const uint8_t* indic constexpr auto loada = vfloat4::loada; constexpr auto load1 = vfloat4::load1; + constexpr auto vint_from_size = vint4_from_size; #else // If we have nothing expose 4-wide VLA, and 4-wide fixed width. @@ -209,6 +213,7 @@ template T gatherf_byte_inds(const float* base, const uint8_t* indic constexpr auto loada = vfloat4::loada; constexpr auto load1 = vfloat4::load1; + constexpr auto vint_from_size = vint4_from_size; #endif /** diff --git a/Source/astcenc_vecmathlib_avx2_8.h b/Source/astcenc_vecmathlib_avx2_8.h index 4c96c7c5..af4ecf78 100644 --- a/Source/astcenc_vecmathlib_avx2_8.h +++ b/Source/astcenc_vecmathlib_avx2_8.h @@ -497,6 +497,15 @@ ASTCENC_SIMD_INLINE int hmax_s(vint8 a) return _mm256_cvtsi256_si32(hmax(a).m); } +/** + * @brief Generate a vint8 from a size_t. + */ + ASTCENC_SIMD_INLINE vint8 vint8_from_size(size_t a) + { + assert(a <= std::numeric_limits::max()); + return vint8(static_cast(a)); + } + /** * @brief Store a vector to a 16B aligned memory address. */ diff --git a/Source/astcenc_vecmathlib_common_4.h b/Source/astcenc_vecmathlib_common_4.h index 152d3fc3..f3a00db3 100644 --- a/Source/astcenc_vecmathlib_common_4.h +++ b/Source/astcenc_vecmathlib_common_4.h @@ -31,6 +31,7 @@ #endif #include +#include // ============================================================================ // vint4 operators and functions @@ -117,6 +118,15 @@ ASTCENC_SIMD_INLINE int hmin_s(vint4 a) return hmin(a).lane<0>(); } +/** + * @brief Generate a vint4 from a size_t. + */ + ASTCENC_SIMD_INLINE vint4 vint4_from_size(size_t a) + { + assert(a <= std::numeric_limits::max()); + return vint4(static_cast(a)); + } + /** * @brief Return the horizontal maximum of a vector. */ diff --git a/Source/astcenc_vecmathlib_sve_8.h b/Source/astcenc_vecmathlib_sve_8.h index 1e98df02..9f18853f 100644 --- a/Source/astcenc_vecmathlib_sve_8.h +++ b/Source/astcenc_vecmathlib_sve_8.h @@ -485,6 +485,15 @@ ASTCENC_SIMD_INLINE int hmax_s(vint8 a) return svmaxv_s32(svptrue_b32(), a.m); } +/** + * @brief Generate a vint8 from a size_t. + */ + ASTCENC_SIMD_INLINE vint8 vint8_from_size(size_t a) + { + assert(a <= std::numeric_limits::max()); + return vint8(static_cast(a)); + } + /** * @brief Store a vector to a 16B aligned memory address. */ diff --git a/Source/cmake_core.cmake b/Source/cmake_core.cmake index 7b3e47c9..8c633a10 100644 --- a/Source/cmake_core.cmake +++ b/Source/cmake_core.cmake @@ -164,6 +164,7 @@ macro(astcenc_set_properties ASTCENC_TARGET_NAME ASTCENC_VENEER_TYPE) # MSVC compiler defines $<${is_msvc_fe}:/EHsc> + $<${is_msvc_fe}:/WX> $<${is_msvccl}:/wd4324> # G++ and Clang++ compiler defines