diff --git a/Source/astcenc_block_sizes.cpp b/Source/astcenc_block_sizes.cpp index 17e51dfc..399e6930 100644 --- a/Source/astcenc_block_sizes.cpp +++ b/Source/astcenc_block_sizes.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2023 Arm Limited +// Copyright 2011-2025 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -384,12 +384,12 @@ static void init_decimation_info_2d( } // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails - unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); - for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++) + size_t texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); + for (size_t i = texels_per_block; i < texels_per_block_simd; i++) { di.texel_weight_count[i] = 0; - for (unsigned int j = 0; j < 4; j++) + for (size_t j = 0; j < 4; j++) { di.texel_weight_contribs_float_tr[j][i] = 0; di.texel_weights_tr[j][i] = 0; @@ -402,12 +402,12 @@ static void init_decimation_info_2d( unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1]; uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1]; - unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); - for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++) + size_t weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); + for (size_t i = weights_per_block; i < weights_per_block_simd; i++) { di.weight_texel_count[i] = 0; - for (unsigned int j = 0; j < max_texel_count_of_weight; j++) + for (size_t j = 0; j < max_texel_count_of_weight; j++) { di.weight_texels_tr[j][i] = last_texel; di.weights_texel_contribs_tr[j][i] = 0.0f; @@ -640,12 +640,12 @@ static void init_decimation_info_3d( } // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails - unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); - for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++) + size_t texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); + for (size_t i = texels_per_block; i < texels_per_block_simd; i++) { di.texel_weight_count[i] = 0; - for (unsigned int j = 0; j < 4; j++) + for (size_t j = 0; j < 4; j++) { di.texel_weight_contribs_float_tr[j][i] = 0; di.texel_weights_tr[j][i] = 0; @@ -658,12 +658,12 @@ static void init_decimation_info_3d( int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1]; uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1]; - unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); - for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++) + size_t weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); + for (size_t i = weights_per_block; i < weights_per_block_simd; i++) { di.weight_texel_count[i] = 0; - for (int j = 0; j < max_texel_count_of_weight; j++) + for (size_t j = 0; j < max_texel_count_of_weight; j++) { di.weight_texels_tr[j][i] = last_texel; di.weights_texel_contribs_tr[j][i] = 0.0f; diff --git a/Source/astcenc_ideal_endpoints_and_weights.cpp b/Source/astcenc_ideal_endpoints_and_weights.cpp index e22b29f4..bd2e4ba2 100644 --- a/Source/astcenc_ideal_endpoints_and_weights.cpp +++ b/Source/astcenc_ideal_endpoints_and_weights.cpp @@ -195,8 +195,8 @@ static void compute_ideal_colors_and_weights_1_comp( } // Zero initialize any SIMD over-fetch - unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count); - for (unsigned int i = texel_count; i < texel_count_simd; i++) + size_t texel_count_simd = round_up_to_simd_multiple_vla(texel_count); + for (size_t i = texel_count; i < texel_count_simd; i++) { ei.weights[i] = 0.0f; ei.weight_error_scale[i] = 0.0f; @@ -333,8 +333,8 @@ static void compute_ideal_colors_and_weights_2_comp( } // Zero initialize any SIMD over-fetch - unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count); - for (unsigned int i = texel_count; i < texel_count_simd; i++) + size_t texel_count_simd = round_up_to_simd_multiple_vla(texel_count); + for (size_t i = texel_count; i < texel_count_simd; i++) { ei.weights[i] = 0.0f; ei.weight_error_scale[i] = 0.0f; @@ -500,8 +500,8 @@ static void compute_ideal_colors_and_weights_3_comp( } // Zero initialize any SIMD over-fetch - unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count); - for (unsigned int i = texel_count; i < texel_count_simd; i++) + size_t texel_count_simd = round_up_to_simd_multiple_vla(texel_count); + for (size_t i = texel_count; i < texel_count_simd; i++) { ei.weights[i] = 0.0f; ei.weight_error_scale[i] = 0.0f; @@ -598,8 +598,8 @@ static void compute_ideal_colors_and_weights_4_comp( } // Zero initialize any SIMD over-fetch - unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count); - for (unsigned int i = texel_count; i < texel_count_simd; i++) + size_t texel_count_simd = round_up_to_simd_multiple_vla(texel_count); + for (size_t i = texel_count; i < texel_count_simd; i++) { ei.weights[i] = 0.0f; ei.weight_error_scale[i] = 0.0f; diff --git a/Source/astcenc_partition_tables.cpp b/Source/astcenc_partition_tables.cpp index cad42384..3a127cce 100644 --- a/Source/astcenc_partition_tables.cpp +++ b/Source/astcenc_partition_tables.cpp @@ -304,9 +304,9 @@ static bool generate_one_partition_info_entry( // Fill loop tail so we can overfetch later for (unsigned int i = 0; i < partition_count; i++) { - int ptex_count = counts[i]; - int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count); - for (int j = ptex_count; j < ptex_count_simd; j++) + size_t ptex_count = counts[i]; + size_t ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count); + for (size_t j = ptex_count; j < ptex_count_simd; j++) { pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1]; } diff --git a/Source/astcenc_pick_best_endpoint_format.cpp b/Source/astcenc_pick_best_endpoint_format.cpp index bf872a92..dc0af906 100644 --- a/Source/astcenc_pick_best_endpoint_format.cpp +++ b/Source/astcenc_pick_best_endpoint_format.cpp @@ -1135,13 +1135,13 @@ unsigned int compute_ideal_endpoint_formats( vfloat clear_error(ERROR_CALC_DEFAULT); vint clear_quant(0); - unsigned int packed_start_block_mode = round_down_to_simd_multiple_vla(start_block_mode); + size_t packed_start_block_mode = round_down_to_simd_multiple_vla(start_block_mode); storea(clear_error, errors_of_best_combination + packed_start_block_mode); store_nbytes(clear_quant, best_quant_levels + packed_start_block_mode); store_nbytes(clear_quant, best_quant_levels_mod + packed_start_block_mode); // Ensure that last iteration overstep contains data that will never be picked - unsigned int packed_end_block_mode = round_down_to_simd_multiple_vla(end_block_mode - 1); + size_t packed_end_block_mode = round_down_to_simd_multiple_vla(end_block_mode - 1); storea(clear_error, errors_of_best_combination + packed_end_block_mode); store_nbytes(clear_quant, best_quant_levels + packed_end_block_mode); store_nbytes(clear_quant, best_quant_levels_mod + packed_end_block_mode); diff --git a/Source/astcenc_vecmathlib.h b/Source/astcenc_vecmathlib.h index e6ae97cc..6f891ab3 100644 --- a/Source/astcenc_vecmathlib.h +++ b/Source/astcenc_vecmathlib.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2019-2024 Arm Limited +// Copyright 2019-2025 Arm Limited // Copyright 2008 Jose Fonseca // // Licensed under the Apache License, Version 2.0 (the "License"); you may not @@ -211,30 +211,6 @@ template T gatherf_byte_inds(const float* base, const uint8_t* indic constexpr auto load1 = vfloat4::load1; #endif -/** - * @brief Round a count down to the largest multiple of 8. - * - * @param count The unrounded value. - * - * @return The rounded value. - */ -ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_8(unsigned int count) -{ - return count & static_cast(~(8 - 1)); -} - -/** - * @brief Round a count down to the largest multiple of 4. - * - * @param count The unrounded value. - * - * @return The rounded value. - */ -ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_4(unsigned int count) -{ - return count & static_cast(~(4 - 1)); -} - /** * @brief Round a count down to the largest multiple of the SIMD width. * @@ -244,9 +220,9 @@ ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_4(unsigned int coun * * @return The rounded value. */ -ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_vla(unsigned int count) +ASTCENC_SIMD_INLINE size_t round_down_to_simd_multiple_vla(size_t count) { - return count & static_cast(~(ASTCENC_SIMD_WIDTH - 1)); + return count & static_cast(~(ASTCENC_SIMD_WIDTH - 1)); } /** @@ -258,9 +234,9 @@ ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_vla(unsigned int co * * @return The rounded value. */ -ASTCENC_SIMD_INLINE unsigned int round_up_to_simd_multiple_vla(unsigned int count) +ASTCENC_SIMD_INLINE size_t round_up_to_simd_multiple_vla(size_t count) { - unsigned int multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH; + size_t multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH; return multiples * ASTCENC_SIMD_WIDTH; }