From 1fd739a422c992d5c51a22aba9b98e0823373efc Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 13 Mar 2025 20:20:58 +0000 Subject: [PATCH 1/3] Typo fix --- Source/astcenc_find_best_partitioning.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/astcenc_find_best_partitioning.cpp b/Source/astcenc_find_best_partitioning.cpp index f2e43282..20258896 100644 --- a/Source/astcenc_find_best_partitioning.cpp +++ b/Source/astcenc_find_best_partitioning.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2024 Arm Limited +// Copyright 2011-2025 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -226,7 +226,7 @@ static void kmeans_update( uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 }; - // Find the center-of-gravity in each cluster + // Find the center of gravity in each cluster for (unsigned int i = 0; i < texel_count; i++) { uint8_t partition = partition_of_texel[i]; From 849f7e6eee323d28d4559ed2c943f556991825be Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 13 Mar 2025 20:21:20 +0000 Subject: [PATCH 2/3] Readability fix --- Source/astcenc_ideal_endpoints_and_weights.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/astcenc_ideal_endpoints_and_weights.cpp b/Source/astcenc_ideal_endpoints_and_weights.cpp index 8e6ee2f4..e22b29f4 100644 --- a/Source/astcenc_ideal_endpoints_and_weights.cpp +++ b/Source/astcenc_ideal_endpoints_and_weights.cpp @@ -867,7 +867,6 @@ void compute_ideal_weights_for_decimation( } // Otherwise compute an estimate and perform single refinement iteration - ASTCENC_ALIGNAS float infilled_weights[BLOCK_MAX_TEXELS]; // Compute an initial average for each decimated weight bool constant_wes = ei.is_constant_weight_error_scale; @@ -908,6 +907,7 @@ void compute_ideal_weights_for_decimation( // Populate the interpolated weight grid based on the initial average // Process SIMD-width texel coordinates at at time while we can. Safe to // over-process full SIMD vectors - the tail is zeroed. + ASTCENC_ALIGNAS float infilled_weights[BLOCK_MAX_TEXELS]; if (di.max_texel_weight_count <= 2) { for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) From b879ef38db81645cda4cb57f86f2e091a7b1fefb Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 13 Mar 2025 20:21:55 +0000 Subject: [PATCH 3/3] Use size_t loops in astcenc_averages_and_directions.cpp --- Source/astcenc_averages_and_directions.cpp | 68 +++++++++++----------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/Source/astcenc_averages_and_directions.cpp b/Source/astcenc_averages_and_directions.cpp index 8e2f8d8c..5af58d7d 100644 --- a/Source/astcenc_averages_and_directions.cpp +++ b/Source/astcenc_averages_and_directions.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2023 Arm Limited +// Copyright 2011-2025 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -50,7 +50,7 @@ static void compute_partition_averages_rgb( vfloat4 averages[BLOCK_MAX_PARTITIONS] ) { unsigned int partition_count = pi.partition_count; - unsigned int texel_count = blk.texel_count; + size_t texel_count = blk.texel_count; promise(texel_count > 0); // For 1 partition just use the precomputed mean @@ -64,7 +64,7 @@ static void compute_partition_averages_rgb( vfloatacc pp_avg_rgb[3] {}; vint lane_id = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vint texel_partition(pi.partition_of_texel + i); @@ -100,7 +100,7 @@ static void compute_partition_averages_rgb( vfloatacc pp_avg_rgb[2][3] {}; vint lane_id = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vint texel_partition(pi.partition_of_texel + i); @@ -145,7 +145,7 @@ static void compute_partition_averages_rgb( vfloatacc pp_avg_rgb[3][3] {}; vint lane_id = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vint texel_partition(pi.partition_of_texel + i); @@ -221,7 +221,7 @@ static void compute_partition_averages_rgba( vfloat4 averages[BLOCK_MAX_PARTITIONS] ) { unsigned int partition_count = pi.partition_count; - unsigned int texel_count = blk.texel_count; + size_t texel_count = blk.texel_count; promise(texel_count > 0); // For 1 partition just use the precomputed mean @@ -235,7 +235,7 @@ static void compute_partition_averages_rgba( vfloat4 pp_avg_rgba[4] {}; vint lane_id = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vint texel_partition(pi.partition_of_texel + i); @@ -275,7 +275,7 @@ static void compute_partition_averages_rgba( vfloat4 pp_avg_rgba[2][4] {}; vint lane_id = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vint texel_partition(pi.partition_of_texel + i); @@ -326,7 +326,7 @@ static void compute_partition_averages_rgba( vfloat4 pp_avg_rgba[3][4] {}; vint lane_id = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vint texel_partition(pi.partition_of_texel + i); @@ -390,17 +390,17 @@ void compute_avgs_and_dirs_4_comp( const image_block& blk, partition_metrics pm[BLOCK_MAX_PARTITIONS] ) { - int partition_count = pi.partition_count; + size_t partition_count = pi.partition_count; promise(partition_count > 0); // Pre-compute partition_averages vfloat4 partition_averages[BLOCK_MAX_PARTITIONS]; compute_partition_averages_rgba(pi, blk, partition_averages); - for (int partition = 0; partition < partition_count; partition++) + for (size_t partition = 0; partition < partition_count; partition++) { const uint8_t *texel_indexes = pi.texels_of_partition[partition]; - unsigned int texel_count = pi.partition_texel_count[partition]; + size_t texel_count = pi.partition_texel_count[partition]; promise(texel_count > 0); vfloat4 average = partition_averages[partition]; @@ -411,7 +411,7 @@ void compute_avgs_and_dirs_4_comp( vfloat4 sum_zp = vfloat4::zero(); vfloat4 sum_wp = vfloat4::zero(); - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { unsigned int iwt = texel_indexes[i]; vfloat4 texel_datum = blk.texel(iwt); @@ -509,13 +509,13 @@ void compute_avgs_and_dirs_3_comp( partition_averages[3] = partition_averages[3].swz<0, 1, 2>(); } - unsigned int partition_count = pi.partition_count; + size_t partition_count = pi.partition_count; promise(partition_count > 0); - for (unsigned int partition = 0; partition < partition_count; partition++) + for (size_t partition = 0; partition < partition_count; partition++) { const uint8_t *texel_indexes = pi.texels_of_partition[partition]; - unsigned int texel_count = pi.partition_texel_count[partition]; + size_t texel_count = pi.partition_texel_count[partition]; promise(texel_count > 0); vfloat4 average = partition_averages[partition]; @@ -525,7 +525,7 @@ void compute_avgs_and_dirs_3_comp( vfloat4 sum_yp = vfloat4::zero(); vfloat4 sum_zp = vfloat4::zero(); - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { unsigned int iwt = texel_indexes[i]; @@ -570,17 +570,17 @@ void compute_avgs_and_dirs_3_comp_rgb( const image_block& blk, partition_metrics pm[BLOCK_MAX_PARTITIONS] ) { - unsigned int partition_count = pi.partition_count; + size_t partition_count = pi.partition_count; promise(partition_count > 0); // Pre-compute partition_averages vfloat4 partition_averages[BLOCK_MAX_PARTITIONS]; compute_partition_averages_rgb(pi, blk, partition_averages); - for (unsigned int partition = 0; partition < partition_count; partition++) + for (size_t partition = 0; partition < partition_count; partition++) { const uint8_t *texel_indexes = pi.texels_of_partition[partition]; - unsigned int texel_count = pi.partition_texel_count[partition]; + size_t texel_count = pi.partition_texel_count[partition]; promise(texel_count > 0); vfloat4 average = partition_averages[partition]; @@ -590,7 +590,7 @@ void compute_avgs_and_dirs_3_comp_rgb( vfloat4 sum_yp = vfloat4::zero(); vfloat4 sum_zp = vfloat4::zero(); - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { unsigned int iwt = texel_indexes[i]; @@ -664,20 +664,20 @@ void compute_avgs_and_dirs_2_comp( data_vg = blk.data_b; } - unsigned int partition_count = pt.partition_count; + size_t partition_count = pt.partition_count; promise(partition_count > 0); - for (unsigned int partition = 0; partition < partition_count; partition++) + for (size_t partition = 0; partition < partition_count; partition++) { const uint8_t *texel_indexes = pt.texels_of_partition[partition]; - unsigned int texel_count = pt.partition_texel_count[partition]; + size_t texel_count = pt.partition_texel_count[partition]; promise(texel_count > 0); // Only compute a partition mean if more than one partition if (partition_count > 1) { average = vfloat4::zero(); - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { unsigned int iwt = texel_indexes[i]; average += vfloat2(data_vr[iwt], data_vg[iwt]); @@ -691,7 +691,7 @@ void compute_avgs_and_dirs_2_comp( vfloat4 sum_xp = vfloat4::zero(); vfloat4 sum_yp = vfloat4::zero(); - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { unsigned int iwt = texel_indexes[i]; vfloat4 texel_datum = vfloat2(data_vr[iwt], data_vg[iwt]); @@ -729,20 +729,20 @@ void compute_error_squared_rgba( float& uncor_error, float& samec_error ) { - unsigned int partition_count = pi.partition_count; + size_t partition_count = pi.partition_count; promise(partition_count > 0); vfloatacc uncor_errorsumv = vfloatacc::zero(); vfloatacc samec_errorsumv = vfloatacc::zero(); - for (unsigned int partition = 0; partition < partition_count; partition++) + for (size_t partition = 0; partition < partition_count; partition++) { const uint8_t *texel_indexes = pi.texels_of_partition[partition]; processed_line4 l_uncor = uncor_plines[partition]; processed_line4 l_samec = samec_plines[partition]; - unsigned int texel_count = pi.partition_texel_count[partition]; + size_t texel_count = pi.partition_texel_count[partition]; promise(texel_count > 0); // Vectorize some useful scalar inputs @@ -775,7 +775,7 @@ void compute_error_squared_rgba( // array to extend the last value. This means min/max are not impacted, but we need to mask // out the dummy values when we compute the line weighting. vint lane_ids = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vmask mask = lane_ids < vint(texel_count); const uint8_t* texel_idxs = texel_indexes + i; @@ -847,17 +847,17 @@ void compute_error_squared_rgb( float& uncor_error, float& samec_error ) { - unsigned int partition_count = pi.partition_count; + size_t partition_count = pi.partition_count; promise(partition_count > 0); vfloatacc uncor_errorsumv = vfloatacc::zero(); vfloatacc samec_errorsumv = vfloatacc::zero(); - for (unsigned int partition = 0; partition < partition_count; partition++) + for (size_t partition = 0; partition < partition_count; partition++) { partition_lines3& pl = plines[partition]; const uint8_t *texel_indexes = pi.texels_of_partition[partition]; - unsigned int texel_count = pi.partition_texel_count[partition]; + size_t texel_count = pi.partition_texel_count[partition]; promise(texel_count > 0); processed_line3 l_uncor = pl.uncor_pline; @@ -889,7 +889,7 @@ void compute_error_squared_rgb( // to extend the last value. This means min/max are not impacted, but we need to mask // out the dummy values when we compute the line weighting. vint lane_ids = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vmask mask = lane_ids < vint(texel_count); const uint8_t* texel_idxs = texel_indexes + i;