diff --git a/Source/astcenc.h b/Source/astcenc.h index 8ecdc16f..da96cdeb 100644 --- a/Source/astcenc.h +++ b/Source/astcenc.h @@ -317,7 +317,7 @@ extern "C" typedef void (*astcenc_progress_callback)(float); * be e.g. rrrg (the default ordering for ASTC normals on the command line) or gggr (the ordering * used by BC5n). */ -static const unsigned int ASTCENC_FLG_MAP_NORMAL = 1 << 0; +static const size_t ASTCENC_FLG_MAP_NORMAL = 1 << 0; /** * @brief Enable compression heuristics that assume use of decode_unorm8 decode mode. @@ -330,7 +330,7 @@ static const unsigned int ASTCENC_FLG_MAP_NORMAL = 1 << 0; * Note that LDR_SRGB images will always use decode_unorm8 for the RGB channels, irrespective of * this setting. */ -static const unsigned int ASTCENC_FLG_USE_DECODE_UNORM8 = 1 << 1; +static const size_t ASTCENC_FLG_USE_DECODE_UNORM8 = 1 << 1; /** * @brief Enable alpha weighting. @@ -339,7 +339,7 @@ static const unsigned int ASTCENC_FLG_USE_DECODE_UNORM8 = 1 << 1; * the transparency level. This allows the codec to more accurately encode the alpha value in areas * where the color value is less significant. */ -static const unsigned int ASTCENC_FLG_USE_ALPHA_WEIGHT = 1 << 2; +static const size_t ASTCENC_FLG_USE_ALPHA_WEIGHT = 1 << 2; /** * @brief Enable perceptual error metrics. @@ -347,7 +347,7 @@ static const unsigned int ASTCENC_FLG_USE_ALPHA_WEIGHT = 1 << 2; * This mode enables perceptual compression mode, which will optimize for perceptual error rather * than best PSNR. Only some input modes support perceptual error metrics. */ -static const unsigned int ASTCENC_FLG_USE_PERCEPTUAL = 1 << 3; +static const size_t ASTCENC_FLG_USE_PERCEPTUAL = 1 << 3; /** * @brief Create a decompression-only context. @@ -355,7 +355,7 @@ static const unsigned int ASTCENC_FLG_USE_PERCEPTUAL = 1 << 3; * This mode disables support for compression. This enables context allocation to skip some * transient buffer allocation, resulting in lower memory usage. */ -static const unsigned int ASTCENC_FLG_DECOMPRESS_ONLY = 1 << 4; +static const size_t ASTCENC_FLG_DECOMPRESS_ONLY = 1 << 4; /** * @brief Create a self-decompression context. @@ -365,7 +365,7 @@ static const unsigned int ASTCENC_FLG_DECOMPRESS_ONLY = 1 << 4; * cases, and setting this flag enables additional optimizations, but does mean that the context * cannot reliably decompress arbitrary ASTC images. */ -static const unsigned int ASTCENC_FLG_SELF_DECOMPRESS_ONLY = 1 << 5; +static const size_t ASTCENC_FLG_SELF_DECOMPRESS_ONLY = 1 << 5; /** * @brief Enable RGBM map compression. @@ -388,12 +388,12 @@ static const unsigned int ASTCENC_FLG_SELF_DECOMPRESS_ONLY = 1 << 5; * scale, ensuring that the M value is accurately encoded. This defaults to 10 when in RGBM mode, * matching the default scale factor. */ -static const unsigned int ASTCENC_FLG_MAP_RGBM = 1 << 6; +static const size_t ASTCENC_FLG_MAP_RGBM = 1 << 6; /** * @brief The bit mask of all valid flags. */ -static const unsigned int ASTCENC_ALL_FLAGS = +static const size_t ASTCENC_ALL_FLAGS = ASTCENC_FLG_MAP_NORMAL | ASTCENC_FLG_MAP_RGBM | ASTCENC_FLG_USE_ALPHA_WEIGHT | @@ -418,16 +418,16 @@ struct astcenc_config astcenc_profile profile; /** @brief The set of set flags. */ - unsigned int flags; + size_t flags; /** @brief The ASTC block size X dimension. */ - unsigned int block_x; + size_t block_x; /** @brief The ASTC block size Y dimension. */ - unsigned int block_y; + size_t block_y; /** @brief The ASTC block size Z dimension. */ - unsigned int block_z; + size_t block_z; /** @brief The red component weight scale for error weighting (-cw). */ float cw_r_weight; @@ -448,7 +448,7 @@ struct astcenc_config * will be sampled using linear texture filtering to minimize color bleed out of transparent * texels that are adjacent to non-transparent texels. */ - unsigned int a_scale_radius; + size_t a_scale_radius; /** @brief The RGBM scale factor for the shared multiplier (-rgbm). */ float rgbm_m_scale; @@ -458,35 +458,35 @@ struct astcenc_config * * Valid values are between 1 and 4. */ - unsigned int tune_partition_count_limit; + size_t tune_partition_count_limit; /** * @brief The maximum number of partitions searched (-2partitionindexlimit). * * Valid values are between 1 and 1024. */ - unsigned int tune_2partition_index_limit; + size_t tune_2partition_index_limit; /** * @brief The maximum number of partitions searched (-3partitionindexlimit). * * Valid values are between 1 and 1024. */ - unsigned int tune_3partition_index_limit; + size_t tune_3partition_index_limit; /** * @brief The maximum number of partitions searched (-4partitionindexlimit). * * Valid values are between 1 and 1024. */ - unsigned int tune_4partition_index_limit; + size_t tune_4partition_index_limit; /** * @brief The maximum centile for block modes searched (-blockmodelimit). * * Valid values are between 1 and 100. */ - unsigned int tune_block_mode_limit; + size_t tune_block_mode_limit; /** * @brief The maximum iterative refinements applied (-refinementlimit). @@ -494,35 +494,35 @@ struct astcenc_config * Valid values are between 1 and N; there is no technical upper limit * but little benefit is expected after N=4. */ - unsigned int tune_refinement_limit; + size_t tune_refinement_limit; /** * @brief The number of trial candidates per mode search (-candidatelimit). * * Valid values are between 1 and TUNE_MAX_TRIAL_CANDIDATES. */ - unsigned int tune_candidate_limit; + size_t tune_candidate_limit; /** * @brief The number of trial partitionings per search (-2partitioncandidatelimit). * * Valid values are between 1 and TUNE_MAX_PARTITIONING_CANDIDATES. */ - unsigned int tune_2partitioning_candidate_limit; + size_t tune_2partitioning_candidate_limit; /** * @brief The number of trial partitionings per search (-3partitioncandidatelimit). * * Valid values are between 1 and TUNE_MAX_PARTITIONING_CANDIDATES. */ - unsigned int tune_3partitioning_candidate_limit; + size_t tune_3partitioning_candidate_limit; /** * @brief The number of trial partitionings per search (-4partitioncandidatelimit). * * Valid values are between 1 and TUNE_MAX_PARTITIONING_CANDIDATES. */ - unsigned int tune_4partitioning_candidate_limit; + size_t tune_4partitioning_candidate_limit; /** * @brief The dB threshold for stopping block search (-dblimit). @@ -601,13 +601,13 @@ struct astcenc_config struct astcenc_image { /** @brief The X dimension of the image, in texels. */ - unsigned int dim_x; + size_t dim_x; /** @brief The Y dimension of the image, in texels. */ - unsigned int dim_y; + size_t dim_y; /** @brief The Z dimension of the image, in texels. */ - unsigned int dim_z; + size_t dim_z; /** @brief The data type per component. */ astcenc_type data_type; @@ -628,16 +628,16 @@ struct astcenc_block_info astcenc_profile profile; /** @brief The number of texels in the X dimension. */ - unsigned int block_x; + size_t block_x; /** @brief The number of texels in the Y dimension. */ - unsigned int block_y; + size_t block_y; /** @brief The number of texel in the Z dimension. */ - unsigned int block_z; + size_t block_z; /** @brief The number of texels in the block. */ - unsigned int texel_count; + size_t texel_count; /** @brief True if this block is an error block. */ bool is_error_block; @@ -652,31 +652,31 @@ struct astcenc_block_info bool is_dual_plane_block; /** @brief The number of partitions if not constant color. */ - unsigned int partition_count; + size_t partition_count; /** @brief The partition index if 2 - 4 partitions used. */ - unsigned int partition_index; + size_t partition_index; /** @brief The component index of the second plane if dual plane. */ - unsigned int dual_plane_component; + size_t dual_plane_component; /** @brief The color endpoint encoding mode for each partition. */ - unsigned int color_endpoint_modes[4]; + size_t color_endpoint_modes[4]; /** @brief The number of color endpoint quantization levels. */ - unsigned int color_level_count; + size_t color_level_count; /** @brief The number of weight quantization levels. */ - unsigned int weight_level_count; + size_t weight_level_count; /** @brief The number of weights in the X dimension. */ - unsigned int weight_x; + size_t weight_x; /** @brief The number of weights in the Y dimension. */ - unsigned int weight_y; + size_t weight_y; /** @brief The number of weights in the Z dimension. */ - unsigned int weight_z; + size_t weight_z; /** @brief The unpacked color endpoints for each partition. */ float color_endpoints[4][2][4]; @@ -712,11 +712,11 @@ struct astcenc_block_info */ ASTCENC_PUBLIC astcenc_error astcenc_config_init( astcenc_profile profile, - unsigned int block_x, - unsigned int block_y, - unsigned int block_z, + size_t block_x, + size_t block_y, + size_t block_z, float quality, - unsigned int flags, + size_t flags, astcenc_config* config); /** @@ -739,7 +739,7 @@ ASTCENC_PUBLIC astcenc_error astcenc_config_init( */ ASTCENC_PUBLIC astcenc_error astcenc_context_alloc( const astcenc_config* config, - unsigned int thread_count, + size_t thread_count, astcenc_context** context); /** @@ -766,7 +766,7 @@ ASTCENC_PUBLIC astcenc_error astcenc_compress_image( const astcenc_swizzle* swizzle, uint8_t* data_out, size_t data_len, - unsigned int thread_index); + size_t thread_index); /** * @brief Reset the codec state for a new compression. @@ -816,7 +816,7 @@ ASTCENC_PUBLIC astcenc_error astcenc_decompress_image( size_t data_len, astcenc_image* image_out, const astcenc_swizzle* swizzle, - unsigned int thread_index); + size_t thread_index); /** * @brief Reset the codec state for a new decompression. diff --git a/Source/astcenc_averages_and_directions.cpp b/Source/astcenc_averages_and_directions.cpp index 8e2f8d8c..3864ece2 100644 --- a/Source/astcenc_averages_and_directions.cpp +++ b/Source/astcenc_averages_and_directions.cpp @@ -49,8 +49,8 @@ static void compute_partition_averages_rgb( const image_block& blk, vfloat4 averages[BLOCK_MAX_PARTITIONS] ) { - unsigned int partition_count = pi.partition_count; - unsigned int texel_count = blk.texel_count; + size_t partition_count = pi.partition_count; + size_t texel_count = blk.texel_count; promise(texel_count > 0); // For 1 partition just use the precomputed mean @@ -64,7 +64,7 @@ static void compute_partition_averages_rgb( vfloatacc pp_avg_rgb[3] {}; vint lane_id = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vint texel_partition(pi.partition_of_texel + i); @@ -100,7 +100,7 @@ static void compute_partition_averages_rgb( vfloatacc pp_avg_rgb[2][3] {}; vint lane_id = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vint texel_partition(pi.partition_of_texel + i); @@ -145,7 +145,7 @@ static void compute_partition_averages_rgb( vfloatacc pp_avg_rgb[3][3] {}; vint lane_id = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vint texel_partition(pi.partition_of_texel + i); @@ -220,8 +220,8 @@ static void compute_partition_averages_rgba( const image_block& blk, vfloat4 averages[BLOCK_MAX_PARTITIONS] ) { - unsigned int partition_count = pi.partition_count; - unsigned int texel_count = blk.texel_count; + size_t partition_count = pi.partition_count; + size_t texel_count = blk.texel_count; promise(texel_count > 0); // For 1 partition just use the precomputed mean @@ -235,7 +235,7 @@ static void compute_partition_averages_rgba( vfloat4 pp_avg_rgba[4] {}; vint lane_id = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vint texel_partition(pi.partition_of_texel + i); @@ -275,7 +275,7 @@ static void compute_partition_averages_rgba( vfloat4 pp_avg_rgba[2][4] {}; vint lane_id = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vint texel_partition(pi.partition_of_texel + i); @@ -326,7 +326,7 @@ static void compute_partition_averages_rgba( vfloat4 pp_avg_rgba[3][4] {}; vint lane_id = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vint texel_partition(pi.partition_of_texel + i); @@ -400,7 +400,7 @@ void compute_avgs_and_dirs_4_comp( for (int partition = 0; partition < partition_count; partition++) { const uint8_t *texel_indexes = pi.texels_of_partition[partition]; - unsigned int texel_count = pi.partition_texel_count[partition]; + size_t texel_count = pi.partition_texel_count[partition]; promise(texel_count > 0); vfloat4 average = partition_averages[partition]; @@ -411,9 +411,9 @@ void compute_avgs_and_dirs_4_comp( vfloat4 sum_zp = vfloat4::zero(); vfloat4 sum_wp = vfloat4::zero(); - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { - unsigned int iwt = texel_indexes[i]; + size_t iwt = texel_indexes[i]; vfloat4 texel_datum = blk.texel(iwt); texel_datum = texel_datum - average; @@ -459,7 +459,7 @@ void compute_avgs_and_dirs_4_comp( void compute_avgs_and_dirs_3_comp( const partition_info& pi, const image_block& blk, - unsigned int omitted_component, + size_t omitted_component, partition_metrics pm[BLOCK_MAX_PARTITIONS] ) { // Pre-compute partition_averages @@ -509,13 +509,13 @@ void compute_avgs_and_dirs_3_comp( partition_averages[3] = partition_averages[3].swz<0, 1, 2>(); } - unsigned int partition_count = pi.partition_count; + size_t partition_count = pi.partition_count; promise(partition_count > 0); - for (unsigned int partition = 0; partition < partition_count; partition++) + for (size_t partition = 0; partition < partition_count; partition++) { const uint8_t *texel_indexes = pi.texels_of_partition[partition]; - unsigned int texel_count = pi.partition_texel_count[partition]; + size_t texel_count = pi.partition_texel_count[partition]; promise(texel_count > 0); vfloat4 average = partition_averages[partition]; @@ -525,9 +525,9 @@ void compute_avgs_and_dirs_3_comp( vfloat4 sum_yp = vfloat4::zero(); vfloat4 sum_zp = vfloat4::zero(); - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { - unsigned int iwt = texel_indexes[i]; + size_t iwt = texel_indexes[i]; vfloat4 texel_datum = vfloat3(data_vr[iwt], data_vg[iwt], @@ -570,17 +570,17 @@ void compute_avgs_and_dirs_3_comp_rgb( const image_block& blk, partition_metrics pm[BLOCK_MAX_PARTITIONS] ) { - unsigned int partition_count = pi.partition_count; + size_t partition_count = pi.partition_count; promise(partition_count > 0); // Pre-compute partition_averages vfloat4 partition_averages[BLOCK_MAX_PARTITIONS]; compute_partition_averages_rgb(pi, blk, partition_averages); - for (unsigned int partition = 0; partition < partition_count; partition++) + for (size_t partition = 0; partition < partition_count; partition++) { const uint8_t *texel_indexes = pi.texels_of_partition[partition]; - unsigned int texel_count = pi.partition_texel_count[partition]; + size_t texel_count = pi.partition_texel_count[partition]; promise(texel_count > 0); vfloat4 average = partition_averages[partition]; @@ -590,9 +590,9 @@ void compute_avgs_and_dirs_3_comp_rgb( vfloat4 sum_yp = vfloat4::zero(); vfloat4 sum_zp = vfloat4::zero(); - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { - unsigned int iwt = texel_indexes[i]; + size_t iwt = texel_indexes[i]; vfloat4 texel_datum = blk.texel3(iwt); texel_datum = texel_datum - average; @@ -631,8 +631,8 @@ void compute_avgs_and_dirs_3_comp_rgb( void compute_avgs_and_dirs_2_comp( const partition_info& pt, const image_block& blk, - unsigned int component1, - unsigned int component2, + size_t component1, + size_t component2, partition_metrics pm[BLOCK_MAX_PARTITIONS] ) { vfloat4 average; @@ -664,22 +664,22 @@ void compute_avgs_and_dirs_2_comp( data_vg = blk.data_b; } - unsigned int partition_count = pt.partition_count; + size_t partition_count = pt.partition_count; promise(partition_count > 0); - for (unsigned int partition = 0; partition < partition_count; partition++) + for (size_t partition = 0; partition < partition_count; partition++) { const uint8_t *texel_indexes = pt.texels_of_partition[partition]; - unsigned int texel_count = pt.partition_texel_count[partition]; + size_t texel_count = pt.partition_texel_count[partition]; promise(texel_count > 0); // Only compute a partition mean if more than one partition if (partition_count > 1) { average = vfloat4::zero(); - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { - unsigned int iwt = texel_indexes[i]; + size_t iwt = texel_indexes[i]; average += vfloat2(data_vr[iwt], data_vg[iwt]); } @@ -691,9 +691,9 @@ void compute_avgs_and_dirs_2_comp( vfloat4 sum_xp = vfloat4::zero(); vfloat4 sum_yp = vfloat4::zero(); - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { - unsigned int iwt = texel_indexes[i]; + size_t iwt = texel_indexes[i]; vfloat4 texel_datum = vfloat2(data_vr[iwt], data_vg[iwt]); texel_datum = texel_datum - average; @@ -729,20 +729,20 @@ void compute_error_squared_rgba( float& uncor_error, float& samec_error ) { - unsigned int partition_count = pi.partition_count; + size_t partition_count = pi.partition_count; promise(partition_count > 0); vfloatacc uncor_errorsumv = vfloatacc::zero(); vfloatacc samec_errorsumv = vfloatacc::zero(); - for (unsigned int partition = 0; partition < partition_count; partition++) + for (size_t partition = 0; partition < partition_count; partition++) { const uint8_t *texel_indexes = pi.texels_of_partition[partition]; processed_line4 l_uncor = uncor_plines[partition]; processed_line4 l_samec = samec_plines[partition]; - unsigned int texel_count = pi.partition_texel_count[partition]; + size_t texel_count = pi.partition_texel_count[partition]; promise(texel_count > 0); // Vectorize some useful scalar inputs @@ -775,7 +775,7 @@ void compute_error_squared_rgba( // array to extend the last value. This means min/max are not impacted, but we need to mask // out the dummy values when we compute the line weighting. vint lane_ids = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vmask mask = lane_ids < vint(texel_count); const uint8_t* texel_idxs = texel_indexes + i; @@ -847,17 +847,17 @@ void compute_error_squared_rgb( float& uncor_error, float& samec_error ) { - unsigned int partition_count = pi.partition_count; + size_t partition_count = pi.partition_count; promise(partition_count > 0); vfloatacc uncor_errorsumv = vfloatacc::zero(); vfloatacc samec_errorsumv = vfloatacc::zero(); - for (unsigned int partition = 0; partition < partition_count; partition++) + for (size_t partition = 0; partition < partition_count; partition++) { partition_lines3& pl = plines[partition]; const uint8_t *texel_indexes = pi.texels_of_partition[partition]; - unsigned int texel_count = pi.partition_texel_count[partition]; + size_t texel_count = pi.partition_texel_count[partition]; promise(texel_count > 0); processed_line3 l_uncor = pl.uncor_pline; @@ -889,7 +889,7 @@ void compute_error_squared_rgb( // to extend the last value. This means min/max are not impacted, but we need to mask // out the dummy values when we compute the line weighting. vint lane_ids = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vmask mask = lane_ids < vint(texel_count); const uint8_t* texel_idxs = texel_indexes + i; diff --git a/Source/astcenc_block_sizes.cpp b/Source/astcenc_block_sizes.cpp index 17e51dfc..f91b394c 100644 --- a/Source/astcenc_block_sizes.cpp +++ b/Source/astcenc_block_sizes.cpp @@ -34,17 +34,17 @@ * @return Returns true if a valid mode, false otherwise. */ static bool decode_block_mode_2d( - unsigned int block_mode, - unsigned int& x_weights, - unsigned int& y_weights, + size_t block_mode, + size_t& x_weights, + size_t& y_weights, bool& is_dual_plane, - unsigned int& quant_mode, - unsigned int& weight_bits + size_t& quant_mode, + size_t& weight_bits ) { - unsigned int base_quant_mode = (block_mode >> 4) & 1; - unsigned int H = (block_mode >> 9) & 1; - unsigned int D = (block_mode >> 10) & 1; - unsigned int A = (block_mode >> 5) & 0x3; + size_t base_quant_mode = (block_mode >> 4) & 1; + size_t H = (block_mode >> 9) & 1; + size_t D = (block_mode >> 10) & 1; + size_t A = (block_mode >> 5) & 0x3; x_weights = 0; y_weights = 0; @@ -52,7 +52,7 @@ static bool decode_block_mode_2d( if ((block_mode & 3) != 0) { base_quant_mode |= (block_mode & 3) << 1; - unsigned int B = (block_mode >> 7) & 3; + size_t B = (block_mode >> 7) & 3; switch ((block_mode >> 2) & 3) { case 0: @@ -90,7 +90,7 @@ static bool decode_block_mode_2d( return false; } - unsigned int B = (block_mode >> 9) & 3; + size_t B = (block_mode >> 9) & 3; switch ((block_mode >> 7) & 3) { case 0: @@ -126,7 +126,7 @@ static bool decode_block_mode_2d( } } - unsigned int weight_count = x_weights * y_weights * (D + 1); + size_t weight_count = x_weights * y_weights * (D + 1); quant_mode = (base_quant_mode - 2) + 6 * H; is_dual_plane = D != 0; @@ -150,18 +150,18 @@ static bool decode_block_mode_2d( * @return Returns true if a valid mode, false otherwise. */ static bool decode_block_mode_3d( - unsigned int block_mode, - unsigned int& x_weights, - unsigned int& y_weights, - unsigned int& z_weights, + size_t block_mode, + size_t& x_weights, + size_t& y_weights, + size_t& z_weights, bool& is_dual_plane, - unsigned int& quant_mode, - unsigned int& weight_bits + size_t& quant_mode, + size_t& weight_bits ) { - unsigned int base_quant_mode = (block_mode >> 4) & 1; - unsigned int H = (block_mode >> 9) & 1; - unsigned int D = (block_mode >> 10) & 1; - unsigned int A = (block_mode >> 5) & 0x3; + size_t base_quant_mode = (block_mode >> 4) & 1; + size_t H = (block_mode >> 9) & 1; + size_t D = (block_mode >> 10) & 1; + size_t A = (block_mode >> 5) & 0x3; x_weights = 0; y_weights = 0; @@ -170,8 +170,8 @@ static bool decode_block_mode_3d( if ((block_mode & 3) != 0) { base_quant_mode |= (block_mode & 3) << 1; - unsigned int B = (block_mode >> 7) & 3; - unsigned int C = (block_mode >> 2) & 0x3; + size_t B = (block_mode >> 7) & 3; + size_t C = (block_mode >> 2) & 0x3; x_weights = A + 2; y_weights = B + 2; z_weights = C + 2; @@ -229,7 +229,7 @@ static bool decode_block_mode_3d( } } - unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1); + size_t weight_count = x_weights * y_weights * z_weights * (D + 1); quant_mode = (base_quant_mode - 2) + 6 * H; is_dual_plane = D != 0; @@ -250,15 +250,15 @@ static bool decode_block_mode_3d( * @param[out] wb The decimation table init scratch working buffers. */ static void init_decimation_info_2d( - unsigned int x_texels, - unsigned int y_texels, - unsigned int x_weights, - unsigned int y_weights, + size_t x_texels, + size_t y_texels, + size_t x_weights, + size_t y_weights, decimation_info& di, dt_init_working_buffers& wb ) { - unsigned int texels_per_block = x_texels * y_texels; - unsigned int weights_per_block = x_weights * y_weights; + size_t texels_per_block = x_texels * y_texels; + size_t weights_per_block = x_weights * y_weights; uint8_t max_texel_count_of_weight = 0; @@ -267,46 +267,46 @@ static void init_decimation_info_2d( promise(x_texels > 0); promise(y_texels > 0); - for (unsigned int i = 0; i < weights_per_block; i++) + for (size_t i = 0; i < weights_per_block; i++) { wb.texel_count_of_weight[i] = 0; } - for (unsigned int i = 0; i < texels_per_block; i++) + for (size_t i = 0; i < texels_per_block; i++) { wb.weight_count_of_texel[i] = 0; } - for (unsigned int y = 0; y < y_texels; y++) + for (size_t y = 0; y < y_texels; y++) { - for (unsigned int x = 0; x < x_texels; x++) + for (size_t x = 0; x < x_texels; x++) { - unsigned int texel = y * x_texels + x; + size_t texel = y * x_texels + x; - unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6; - unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6; + size_t x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6; + size_t y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6; - unsigned int x_weight_frac = x_weight & 0xF; - unsigned int y_weight_frac = y_weight & 0xF; - unsigned int x_weight_int = x_weight >> 4; - unsigned int y_weight_int = y_weight >> 4; + size_t x_weight_frac = x_weight & 0xF; + size_t y_weight_frac = y_weight & 0xF; + size_t x_weight_int = x_weight >> 4; + size_t y_weight_int = y_weight >> 4; - unsigned int qweight[4]; + size_t qweight[4]; qweight[0] = x_weight_int + y_weight_int * x_weights; qweight[1] = qweight[0] + 1; qweight[2] = qweight[0] + x_weights; qweight[3] = qweight[2] + 1; // Truncated-precision bilinear interpolation - unsigned int prod = x_weight_frac * y_weight_frac; + size_t prod = x_weight_frac * y_weight_frac; - unsigned int weight[4]; + size_t weight[4]; weight[3] = (prod + 8) >> 4; weight[1] = x_weight_frac - weight[3]; weight[2] = y_weight_frac - weight[3]; weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3]; - for (unsigned int i = 0; i < 4; i++) + for (size_t i = 0; i < 4; i++) { if (weight[i] != 0) { @@ -323,12 +323,12 @@ static void init_decimation_info_2d( } uint8_t max_texel_weight_count = 0; - for (unsigned int i = 0; i < texels_per_block; i++) + for (size_t i = 0; i < texels_per_block; i++) { di.texel_weight_count[i] = wb.weight_count_of_texel[i]; max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]); - for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++) + for (size_t j = 0; j < wb.weight_count_of_texel[i]; j++) { di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j]; di.texel_weight_contribs_float_tr[j][i] = static_cast(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM); @@ -336,7 +336,7 @@ static void init_decimation_info_2d( } // Init all 4 entries so we can rely on zeros for vectorization - for (unsigned int j = wb.weight_count_of_texel[i]; j < 4; j++) + for (size_t j = wb.weight_count_of_texel[i]; j < 4; j++) { di.texel_weight_contribs_int_tr[j][i] = 0; di.texel_weight_contribs_float_tr[j][i] = 0.0f; @@ -346,12 +346,12 @@ static void init_decimation_info_2d( di.max_texel_weight_count = max_texel_weight_count; - for (unsigned int i = 0; i < weights_per_block; i++) + for (size_t i = 0; i < weights_per_block; i++) { - unsigned int texel_count_wt = wb.texel_count_of_weight[i]; + size_t texel_count_wt = wb.texel_count_of_weight[i]; di.weight_texel_count[i] = static_cast(texel_count_wt); - for (unsigned int j = 0; j < texel_count_wt; j++) + for (size_t j = 0; j < texel_count_wt; j++) { uint8_t texel = wb.texels_of_weight[i][j]; @@ -361,7 +361,7 @@ static void init_decimation_info_2d( // Store the per-texel contribution of this weight for each texel it contributes to di.texel_contrib_for_weight[j][i] = 0.0f; - for (unsigned int k = 0; k < 4; k++) + for (size_t k = 0; k < 4; k++) { uint8_t dttw = di.texel_weights_tr[k][texel]; float dttwf = di.texel_weight_contribs_float_tr[k][texel]; @@ -376,7 +376,7 @@ static void init_decimation_info_2d( // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails // Match last texel in active lane in SIMD group, for better gathers uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i]; - for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++) + for (size_t j = texel_count_wt; j < max_texel_count_of_weight; j++) { di.weight_texels_tr[j][i] = last_texel; di.weights_texel_contribs_tr[j][i] = 0.0f; @@ -384,12 +384,12 @@ static void init_decimation_info_2d( } // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails - unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); - for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++) + size_t texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); + for (size_t i = texels_per_block; i < texels_per_block_simd; i++) { di.texel_weight_count[i] = 0; - for (unsigned int j = 0; j < 4; j++) + for (size_t j = 0; j < 4; j++) { di.texel_weight_contribs_float_tr[j][i] = 0; di.texel_weights_tr[j][i] = 0; @@ -399,15 +399,15 @@ static void init_decimation_info_2d( // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails // Match last texel in active lane in SIMD group, for better gathers - unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1]; + size_t last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1]; uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1]; - unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); - for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++) + size_t weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); + for (size_t i = weights_per_block; i < weights_per_block_simd; i++) { di.weight_texel_count[i] = 0; - for (unsigned int j = 0; j < max_texel_count_of_weight; j++) + for (size_t j = 0; j < max_texel_count_of_weight; j++) { di.weight_texels_tr[j][i] = last_texel; di.weights_texel_contribs_tr[j][i] = 0.0f; @@ -434,38 +434,38 @@ static void init_decimation_info_2d( @param[out] wb The decimation table init scratch working buffers. */ static void init_decimation_info_3d( - unsigned int x_texels, - unsigned int y_texels, - unsigned int z_texels, - unsigned int x_weights, - unsigned int y_weights, - unsigned int z_weights, + size_t x_texels, + size_t y_texels, + size_t z_texels, + size_t x_weights, + size_t y_weights, + size_t z_weights, decimation_info& di, dt_init_working_buffers& wb ) { - unsigned int texels_per_block = x_texels * y_texels * z_texels; - unsigned int weights_per_block = x_weights * y_weights * z_weights; + size_t texels_per_block = x_texels * y_texels * z_texels; + size_t weights_per_block = x_weights * y_weights * z_weights; uint8_t max_texel_count_of_weight = 0; promise(weights_per_block > 0); promise(texels_per_block > 0); - for (unsigned int i = 0; i < weights_per_block; i++) + for (size_t i = 0; i < weights_per_block; i++) { wb.texel_count_of_weight[i] = 0; } - for (unsigned int i = 0; i < texels_per_block; i++) + for (size_t i = 0; i < texels_per_block; i++) { wb.weight_count_of_texel[i] = 0; } - for (unsigned int z = 0; z < z_texels; z++) + for (size_t z = 0; z < z_texels; z++) { - for (unsigned int y = 0; y < y_texels; y++) + for (size_t y = 0; y < y_texels; y++) { - for (unsigned int x = 0; x < x_texels; x++) + for (size_t x = 0; x < x_texels; x++) { int texel = (z * y_texels + y) * x_texels + x; @@ -561,7 +561,7 @@ static void init_decimation_info_3d( weight[2] = w2; weight[3] = w3; - for (unsigned int i = 0; i < 4; i++) + for (size_t i = 0; i < 4; i++) { if (weight[i] != 0) { @@ -579,20 +579,20 @@ static void init_decimation_info_3d( } uint8_t max_texel_weight_count = 0; - for (unsigned int i = 0; i < texels_per_block; i++) + for (size_t i = 0; i < texels_per_block; i++) { di.texel_weight_count[i] = wb.weight_count_of_texel[i]; max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]); // Init all 4 entries so we can rely on zeros for vectorization - for (unsigned int j = 0; j < 4; j++) + for (size_t j = 0; j < 4; j++) { di.texel_weight_contribs_int_tr[j][i] = 0; di.texel_weight_contribs_float_tr[j][i] = 0.0f; di.texel_weights_tr[j][i] = 0; } - for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++) + for (size_t j = 0; j < wb.weight_count_of_texel[i]; j++) { di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j]; di.texel_weight_contribs_float_tr[j][i] = static_cast(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM); @@ -602,14 +602,14 @@ static void init_decimation_info_3d( di.max_texel_weight_count = max_texel_weight_count; - for (unsigned int i = 0; i < weights_per_block; i++) + for (size_t i = 0; i < weights_per_block; i++) { - unsigned int texel_count_wt = wb.texel_count_of_weight[i]; + size_t texel_count_wt = wb.texel_count_of_weight[i]; di.weight_texel_count[i] = static_cast(texel_count_wt); - for (unsigned int j = 0; j < texel_count_wt; j++) + for (size_t j = 0; j < texel_count_wt; j++) { - unsigned int texel = wb.texels_of_weight[i][j]; + size_t texel = wb.texels_of_weight[i][j]; // Create transposed versions of these for better vectorization di.weight_texels_tr[j][i] = static_cast(texel); @@ -617,7 +617,7 @@ static void init_decimation_info_3d( // Store the per-texel contribution of this weight for each texel it contributes to di.texel_contrib_for_weight[j][i] = 0.0f; - for (unsigned int k = 0; k < 4; k++) + for (size_t k = 0; k < 4; k++) { uint8_t dttw = di.texel_weights_tr[k][texel]; float dttwf = di.texel_weight_contribs_float_tr[k][texel]; @@ -632,7 +632,7 @@ static void init_decimation_info_3d( // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails // Match last texel in active lane in SIMD group, for better gathers uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i]; - for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++) + for (size_t j = texel_count_wt; j < max_texel_count_of_weight; j++) { di.weight_texels_tr[j][i] = last_texel; di.weights_texel_contribs_tr[j][i] = 0.0f; @@ -640,12 +640,12 @@ static void init_decimation_info_3d( } // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails - unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); - for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++) + size_t texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); + for (size_t i = texels_per_block; i < texels_per_block_simd; i++) { di.texel_weight_count[i] = 0; - for (unsigned int j = 0; j < 4; j++) + for (size_t j = 0; j < 4; j++) { di.texel_weight_contribs_float_tr[j][i] = 0; di.texel_weights_tr[j][i] = 0; @@ -658,8 +658,8 @@ static void init_decimation_info_3d( int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1]; uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1]; - unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); - for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++) + size_t weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); + for (size_t i = weights_per_block; i < weights_per_block_simd; i++) { di.weight_texel_count[i] = 0; @@ -711,7 +711,7 @@ static void assign_kmeans_texels( } // Assign 64 random indices, retrying if we see repeats - unsigned int arr_elements_set = 0; + size_t arr_elements_set = 0; while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS) { uint8_t texel = static_cast(astc::rand(rng_state)); @@ -736,15 +736,15 @@ static void assign_kmeans_texels( * @param index The packed array index to populate. */ static void construct_dt_entry_2d( - unsigned int x_texels, - unsigned int y_texels, - unsigned int x_weights, - unsigned int y_weights, + size_t x_texels, + size_t y_texels, + size_t x_weights, + size_t y_weights, block_size_descriptor& bsd, dt_init_working_buffers& wb, - unsigned int index + size_t index ) { - unsigned int weight_count = x_weights * y_weights; + size_t weight_count = x_weights * y_weights; assert(weight_count <= BLOCK_MAX_WEIGHTS); bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS; @@ -756,7 +756,7 @@ static void construct_dt_entry_2d( int maxprec_2planes = -1; for (int i = 0; i < 12; i++) { - unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast(i)); + size_t bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast(i)); if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS) { maxprec_1plane = i; @@ -764,7 +764,7 @@ static void construct_dt_entry_2d( if (try_2planes) { - unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast(i)); + size_t bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast(i)); if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS) { maxprec_2planes = i; @@ -790,15 +790,15 @@ static void construct_dt_entry_2d( * @param[out] bsd The block size descriptor to populate. */ static void construct_block_size_descriptor_2d( - unsigned int x_texels, - unsigned int y_texels, + size_t x_texels, + size_t y_texels, bool can_omit_modes, float mode_cutoff, block_size_descriptor& bsd ) { // Store a remap table for storing packed decimation modes. // Indexing uses [Y * 16 + X] and max size for each axis is 12. - static const unsigned int MAX_DMI = 12 * 16 + 12; + static const size_t MAX_DMI = 12 * 16 + 12; int decimation_mode_index[MAX_DMI]; dt_init_working_buffers* wb = new dt_init_working_buffers; @@ -808,7 +808,7 @@ static void construct_block_size_descriptor_2d( bsd.zdim = 1; bsd.texel_count = static_cast(x_texels * y_texels); - for (unsigned int i = 0; i < MAX_DMI; i++) + for (size_t i = 0; i < MAX_DMI; i++) { decimation_mode_index[i] = -1; } @@ -824,15 +824,15 @@ static void construct_block_size_descriptor_2d( #endif // Construct the list of block formats referencing the decimation tables - unsigned int packed_bm_idx = 0; - unsigned int packed_dm_idx = 0; + size_t packed_bm_idx = 0; + size_t packed_dm_idx = 0; // Trackers - unsigned int bm_counts[4] { 0 }; - unsigned int dm_counts[4] { 0 }; + size_t bm_counts[4] { 0 }; + size_t dm_counts[4] { 0 }; // Clear the list to a known-bad value - for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) + for (size_t i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) { bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE; } @@ -842,10 +842,10 @@ static void construct_block_size_descriptor_2d( // - Pass 1 - keep selected single plane "non-always" block modes // - Pass 2 - keep select dual plane block modes // - Pass 3 - keep everything else that's legal - unsigned int limit = can_omit_modes ? 3 : 4; - for (unsigned int j = 0; j < limit; j ++) + size_t limit = can_omit_modes ? 3 : 4; + for (size_t j = 0; j < limit; j ++) { - for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) + for (size_t i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) { // Skip modes we've already included in a previous pass if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE) @@ -854,11 +854,11 @@ static void construct_block_size_descriptor_2d( } // Decode parameters - unsigned int x_weights; - unsigned int y_weights; + size_t x_weights; + size_t y_weights; bool is_dual_plane; - unsigned int quant_mode; - unsigned int weight_bits; + size_t quant_mode; + size_t weight_bits; bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits); // Always skip invalid encodings for the current block size @@ -965,7 +965,7 @@ static void construct_block_size_descriptor_2d( #endif // Ensure the end of the array contains valid data (should never get read) - for (unsigned int i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++) + for (size_t i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++) { bsd.decimation_modes[i].maxprec_1plane = -1; bsd.decimation_modes[i].maxprec_2planes = -1; @@ -991,16 +991,16 @@ static void construct_block_size_descriptor_2d( * @param[out] bsd The block size descriptor to populate. */ static void construct_block_size_descriptor_3d( - unsigned int x_texels, - unsigned int y_texels, - unsigned int z_texels, + size_t x_texels, + size_t y_texels, + size_t z_texels, block_size_descriptor& bsd ) { // Store a remap table for storing packed decimation modes. // Indexing uses [Z * 64 + Y * 8 + X] and max size for each axis is 6. - static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6; + static constexpr size_t MAX_DMI = 6 * 64 + 6 * 8 + 6; int decimation_mode_index[MAX_DMI]; - unsigned int decimation_mode_count = 0; + size_t decimation_mode_count = 0; dt_init_working_buffers* wb = new dt_init_working_buffers; @@ -1009,19 +1009,19 @@ static void construct_block_size_descriptor_3d( bsd.zdim = static_cast(z_texels); bsd.texel_count = static_cast(x_texels * y_texels * z_texels); - for (unsigned int i = 0; i < MAX_DMI; i++) + for (size_t i = 0; i < MAX_DMI; i++) { decimation_mode_index[i] = -1; } // gather all the infill-modes that can be used with the current block size - for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++) + for (size_t x_weights = 2; x_weights <= x_texels; x_weights++) { - for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++) + for (size_t y_weights = 2; y_weights <= y_texels; y_weights++) { - for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++) + for (size_t z_weights = 2; z_weights <= z_texels; z_weights++) { - unsigned int weight_count = x_weights * y_weights * z_weights; + size_t weight_count = x_weights * y_weights * z_weights; if (weight_count > BLOCK_MAX_WEIGHTS) { continue; @@ -1033,15 +1033,15 @@ static void construct_block_size_descriptor_3d( int maxprec_1plane = -1; int maxprec_2planes = -1; - for (unsigned int i = 0; i < 12; i++) + for (size_t i = 0; i < 12; i++) { - unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast(i)); + size_t bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast(i)); if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS) { maxprec_1plane = i; } - unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast(i)); + size_t bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast(i)); if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS) { maxprec_2planes = i; @@ -1063,7 +1063,7 @@ static void construct_block_size_descriptor_3d( } // Ensure the end of the array contains valid data (should never get read) - for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++) + for (size_t i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++) { bsd.decimation_modes[i].maxprec_1plane = -1; bsd.decimation_modes[i].maxprec_2planes = -1; @@ -1078,20 +1078,20 @@ static void construct_block_size_descriptor_3d( // Construct the list of block formats referencing the decimation tables // Clear the list to a known-bad value - for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) + for (size_t i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) { bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE; } - unsigned int packed_idx = 0; - unsigned int bm_counts[2] { 0 }; + size_t packed_idx = 0; + size_t bm_counts[2] { 0 }; // Iterate two times to build a usefully ordered list: // - Pass 0 - keep valid single plane block modes // - Pass 1 - keep valid dual plane block modes - for (unsigned int j = 0; j < 2; j++) + for (size_t j = 0; j < 2; j++) { - for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) + for (size_t i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) { // Skip modes we've already included in a previous pass if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE) @@ -1099,12 +1099,12 @@ static void construct_block_size_descriptor_3d( continue; } - unsigned int x_weights; - unsigned int y_weights; - unsigned int z_weights; + size_t x_weights; + size_t y_weights; + size_t z_weights; bool is_dual_plane; - unsigned int quant_mode; - unsigned int weight_bits; + size_t quant_mode; + size_t weight_bits; bool valid = decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits); // Skip invalid encodings @@ -1163,11 +1163,11 @@ static void construct_block_size_descriptor_3d( /* See header for documentation. */ void init_block_size_descriptor( - unsigned int x_texels, - unsigned int y_texels, - unsigned int z_texels, + size_t x_texels, + size_t y_texels, + size_t z_texels, bool can_omit_modes, - unsigned int partition_count_cutoff, + size_t partition_count_cutoff, float mode_cutoff, block_size_descriptor& bsd ) { diff --git a/Source/astcenc_compress_symbolic.cpp b/Source/astcenc_compress_symbolic.cpp index 789eac19..8148c70c 100644 --- a/Source/astcenc_compress_symbolic.cpp +++ b/Source/astcenc_compress_symbolic.cpp @@ -37,10 +37,10 @@ static void merge_endpoints( const endpoints& ep_plane1, const endpoints& ep_plane2, - unsigned int component_plane2, + size_t component_plane2, endpoints& result ) { - unsigned int partition_count = ep_plane1.partition_count; + size_t partition_count = ep_plane1.partition_count; assert(partition_count == 1); vmask4 sep_mask = vint4::lane_id() == vint4(component_plane2); @@ -73,15 +73,15 @@ static bool realign_weights_undecimated( symbolic_compressed_block& scb ) { // Get the partition descriptor - unsigned int partition_count = scb.partition_count; + size_t partition_count = scb.partition_count; const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index); // Get the quantization table const block_mode& bm = bsd.get_block_mode(scb.block_mode); - unsigned int weight_quant_level = bm.quant_mode; + size_t weight_quant_level = bm.quant_mode; const quant_and_transfer_table& qat = quant_and_xfer_tables[weight_quant_level]; - unsigned int max_plane = bm.is_dual_plane; + size_t max_plane = bm.is_dual_plane; int plane2_component = scb.plane2_component; vmask4 plane_mask = vint4::lane_id() == vint4(plane2_component); @@ -95,7 +95,7 @@ static bool realign_weights_undecimated( promise(partition_count > 0); - for (unsigned int pa_idx = 0; pa_idx < partition_count; pa_idx++) + for (size_t pa_idx = 0; pa_idx < partition_count; pa_idx++) { unpack_color_endpoints(decode_mode, scb.color_formats[pa_idx], @@ -109,9 +109,9 @@ static bool realign_weights_undecimated( bool adjustments = false; // For each plane and partition ... - for (unsigned int pl_idx = 0; pl_idx <= max_plane; pl_idx++) + for (size_t pl_idx = 0; pl_idx <= max_plane; pl_idx++) { - for (unsigned int pa_idx = 0; pa_idx < partition_count; pa_idx++) + for (size_t pa_idx = 0; pa_idx < partition_count; pa_idx++) { // Compute the endpoint delta for all components in current plane vint4 epd = endpnt1[pa_idx] - endpnt0[pa_idx]; @@ -123,7 +123,7 @@ static bool realign_weights_undecimated( // For each weight compute previous, current, and next errors promise(bsd.texel_count > 0); - for (unsigned int texel = 0; texel < bsd.texel_count; texel++) + for (size_t texel = 0; texel < bsd.texel_count; texel++) { int uqw = dec_weights_uquant[texel]; @@ -136,7 +136,7 @@ static bool realign_weights_undecimated( float weight_down = static_cast(uqw_down - uqw); float weight_up = static_cast(uqw_up - uqw); - unsigned int partition = pi.partition_of_texel[texel]; + size_t partition = pi.partition_of_texel[texel]; vfloat4 color_offset = offset[partition]; vfloat4 color_base = endpnt0f[partition]; @@ -192,20 +192,20 @@ static bool realign_weights_decimated( symbolic_compressed_block& scb ) { // Get the partition descriptor - unsigned int partition_count = scb.partition_count; + size_t partition_count = scb.partition_count; const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index); // Get the quantization table const block_mode& bm = bsd.get_block_mode(scb.block_mode); - unsigned int weight_quant_level = bm.quant_mode; + size_t weight_quant_level = bm.quant_mode; const quant_and_transfer_table& qat = quant_and_xfer_tables[weight_quant_level]; // Get the decimation table const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode); - unsigned int weight_count = di.weight_count; + size_t weight_count = di.weight_count; assert(weight_count != bsd.texel_count); - unsigned int max_plane = bm.is_dual_plane; + size_t max_plane = bm.is_dual_plane; int plane2_component = scb.plane2_component; vmask4 plane_mask = vint4::lane_id() == vint4(plane2_component); @@ -220,7 +220,7 @@ static bool realign_weights_decimated( promise(partition_count > 0); promise(weight_count > 0); - for (unsigned int pa_idx = 0; pa_idx < partition_count; pa_idx++) + for (size_t pa_idx = 0; pa_idx < partition_count; pa_idx++) { unpack_color_endpoints(decode_mode, scb.color_formats[pa_idx], @@ -234,9 +234,9 @@ static bool realign_weights_decimated( bool adjustments = false; // For each plane and partition ... - for (unsigned int pl_idx = 0; pl_idx <= max_plane; pl_idx++) + for (size_t pl_idx = 0; pl_idx <= max_plane; pl_idx++) { - for (unsigned int pa_idx = 0; pa_idx < partition_count; pa_idx++) + for (size_t pa_idx = 0; pa_idx < partition_count; pa_idx++) { // Compute the endpoint delta for all components in current plane vint4 epd = endpnt1[pa_idx] - endpnt0[pa_idx]; @@ -248,7 +248,7 @@ static bool realign_weights_decimated( // Create an unquantized weight grid for this decimation level ASTCENC_ALIGNAS float uq_weightsf[BLOCK_MAX_WEIGHTS]; - for (unsigned int we_idx = 0; we_idx < weight_count; we_idx += ASTCENC_SIMD_WIDTH) + for (size_t we_idx = 0; we_idx < weight_count; we_idx += ASTCENC_SIMD_WIDTH) { vint unquant_value(dec_weights_uquant + we_idx); vfloat unquant_valuef = int_to_float(unquant_value); @@ -256,7 +256,7 @@ static bool realign_weights_decimated( } // For each weight compute previous, current, and next errors - for (unsigned int we_idx = 0; we_idx < weight_count; we_idx++) + for (size_t we_idx = 0; we_idx < weight_count; we_idx++) { int uqw = dec_weights_uquant[we_idx]; uint32_t prev_and_next = qat.prev_next_values[uqw]; @@ -273,11 +273,11 @@ static bool realign_weights_decimated( vfloat4 error_upv = vfloat4::zero(); // Interpolate the colors to create the diffs - unsigned int texels_to_evaluate = di.weight_texel_count[we_idx]; + size_t texels_to_evaluate = di.weight_texel_count[we_idx]; promise(texels_to_evaluate > 0); - for (unsigned int te_idx = 0; te_idx < texels_to_evaluate; te_idx++) + for (size_t te_idx = 0; te_idx < texels_to_evaluate; te_idx++) { - unsigned int texel = di.weight_texels_tr[te_idx][we_idx]; + size_t texel = di.weight_texels_tr[te_idx][we_idx]; float tw_base = di.texel_contrib_for_weight[te_idx][we_idx]; @@ -293,7 +293,7 @@ static bool realign_weights_decimated( float weight_down = weight_base + uqw_diff_down * tw_base - weight_base; float weight_up = weight_base + uqw_diff_up * tw_base - weight_base; - unsigned int partition = pi.partition_of_texel[texel]; + size_t partition = pi.partition_of_texel[texel]; vfloat4 color_offset = offset[partition]; vfloat4 color_base = endpnt0f[partition]; @@ -356,8 +356,8 @@ static float compress_symbolic_block_for_partition_1plane( const image_block& blk, bool only_always, float tune_errorval_threshold, - unsigned int partition_count, - unsigned int partition_index, + size_t partition_count, + size_t partition_index, symbolic_compressed_block& scb, compression_working_buffers& tmpbuf, int quant_limit @@ -385,10 +385,10 @@ static float compress_symbolic_block_for_partition_1plane( uint8_t* dec_weights_uquant = tmpbuf.dec_weights_uquant; // For each decimation mode, compute an ideal set of weights with no quantization - unsigned int max_decimation_modes = only_always ? bsd.decimation_mode_count_always + size_t max_decimation_modes = only_always ? bsd.decimation_mode_count_always : bsd.decimation_mode_count_selected; promise(max_decimation_modes > 0); - for (unsigned int i = 0; i < max_decimation_modes; i++) + for (size_t i = 0; i < max_decimation_modes; i++) { const auto& dm = bsd.get_decimation_mode(i); if (!dm.is_ref_1plane(static_cast(max_weight_quant))) @@ -407,7 +407,7 @@ static float compress_symbolic_block_for_partition_1plane( // Compute maximum colors for the endpoints and ideal weights, then for each endpoint and ideal // weight pair, compute the smallest weight that will result in a color value greater than 1 vfloat4 min_ep(10.0f); - for (unsigned int i = 0; i < partition_count; i++) + for (size_t i = 0; i < partition_count; i++) { vfloat4 ep = (vfloat4(1.0f) - ei.ep.endpt0[i]) / (ei.ep.endpt1[i] - ei.ep.endpt0[i]); @@ -436,10 +436,10 @@ static float compress_symbolic_block_for_partition_1plane( 115 - 4, 111 - 4 - PARTITION_INDEX_BITS, 108 - 4 - PARTITION_INDEX_BITS, 105 - 4 - PARTITION_INDEX_BITS }; - unsigned int max_block_modes = only_always ? bsd.block_mode_count_1plane_always + size_t max_block_modes = only_always ? bsd.block_mode_count_1plane_always : bsd.block_mode_count_1plane_selected; promise(max_block_modes > 0); - for (unsigned int i = 0; i < max_block_modes; i++) + for (size_t i = 0; i < max_block_modes; i++) { const block_mode& bm = bsd.block_modes[i]; @@ -492,7 +492,7 @@ static float compress_symbolic_block_for_partition_1plane( quant_method color_quant_level[TUNE_MAX_TRIAL_CANDIDATES]; quant_method color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES]; - unsigned int candidate_count = compute_ideal_endpoint_formats( + size_t candidate_count = compute_ideal_endpoint_formats( pi, blk, ei.ep, qwt_bitcounts, qwt_errors, config.tune_candidate_limit, 0, max_block_modes, partition_format_specifiers, block_mode_index, @@ -502,7 +502,7 @@ static float compress_symbolic_block_for_partition_1plane( float best_errorval_in_mode = ERROR_CALC_DEFAULT; float best_errorval_in_scb = scb.errorval; - for (unsigned int i = 0; i < candidate_count; i++) + for (size_t i = 0; i < candidate_count; i++) { TRACE_NODE(node0, "candidate"); @@ -528,12 +528,12 @@ static float compress_symbolic_block_for_partition_1plane( uint8_t* u8_weight_src = dec_weights_uquant + BLOCK_MAX_WEIGHTS * bm_packed_index; - for (unsigned int j = 0; j < di.weight_count; j++) + for (size_t j = 0; j < di.weight_count; j++) { workscb.weights[j] = u8_weight_src[j]; } - for (unsigned int l = 0; l < config.tune_refinement_limit; l++) + for (size_t l = 0; l < config.tune_refinement_limit; l++) { recompute_ideal_colors_1plane( blk, pi, di, workscb.weights, @@ -541,7 +541,7 @@ static float compress_symbolic_block_for_partition_1plane( // Quantize the chosen color, tracking if worth trying the mod value bool all_same = color_quant_level[i] != color_quant_level_mod[i]; - for (unsigned int j = 0; j < partition_count; j++) + for (size_t j = 0; j < partition_count; j++) { workscb.color_formats[j] = pack_color_endpoints( workep.endpt0[j], @@ -564,7 +564,7 @@ static float compress_symbolic_block_for_partition_1plane( uint8_t colorvals[BLOCK_MAX_PARTITIONS][8]; uint8_t color_formats_mod[BLOCK_MAX_PARTITIONS] { 0 }; bool all_same_mod = true; - for (unsigned int j = 0; j < partition_count; j++) + for (size_t j = 0; j < partition_count; j++) { color_formats_mod[j] = pack_color_endpoints( workep.endpt0[j], @@ -586,9 +586,9 @@ static float compress_symbolic_block_for_partition_1plane( if (all_same_mod) { workscb.color_formats_matched = 1; - for (unsigned int j = 0; j < BLOCK_MAX_PARTITIONS; j++) + for (size_t j = 0; j < BLOCK_MAX_PARTITIONS; j++) { - for (unsigned int k = 0; k < 8; k++) + for (size_t k = 0; k < 8; k++) { workscb.color_values[j][k] = colorvals[j][k]; } @@ -623,7 +623,7 @@ static float compress_symbolic_block_for_partition_1plane( // iteration can help more so we give it a extra 8% leeway. Use this knowledge to // drive a heuristic to skip blocks that are unlikely to catch up with the best // block we have already. - unsigned int iters_remaining = config.tune_refinement_limit - l; + size_t iters_remaining = config.tune_refinement_limit - l; float threshold = (0.045f * static_cast(iters_remaining)) + 1.08f; if (errorval > (threshold * best_errorval_in_scb)) { @@ -671,7 +671,7 @@ static float compress_symbolic_block_for_partition_1plane( // Average refinement improvement is 3.5% per iteration, so skip blocks that are // unlikely to catch up with the best block we have already. Assume a 4.5% per step to // give benefit of the doubt ... - unsigned int iters_remaining = config.tune_refinement_limit - 1 - l; + size_t iters_remaining = config.tune_refinement_limit - 1 - l; float threshold = (0.045f * static_cast(iters_remaining)) + 1.0f; if (errorval > (threshold * best_errorval_in_scb)) { @@ -718,7 +718,7 @@ static float compress_symbolic_block_for_partition_2planes( const block_size_descriptor& bsd, const image_block& blk, float tune_errorval_threshold, - unsigned int plane2_component, + size_t plane2_component, symbolic_compressed_block& scb, compression_working_buffers& tmpbuf, int quant_limit @@ -740,7 +740,7 @@ static float compress_symbolic_block_for_partition_2planes( uint8_t* dec_weights_uquant = tmpbuf.dec_weights_uquant; // For each decimation mode, compute an ideal set of weights with no quantization - for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++) + for (size_t i = 0; i < bsd.decimation_mode_count_selected; i++) { const auto& dm = bsd.get_decimation_mode(i); if (!dm.is_ref_2plane(static_cast(max_weight_quant))) @@ -801,10 +801,10 @@ static float compress_symbolic_block_for_partition_2planes( int8_t* qwt_bitcounts = tmpbuf.qwt_bitcounts; float* qwt_errors = tmpbuf.qwt_errors; - unsigned int start_2plane = bsd.block_mode_count_1plane_selected; - unsigned int end_2plane = bsd.block_mode_count_1plane_2plane_selected; + size_t start_2plane = bsd.block_mode_count_1plane_selected; + size_t end_2plane = bsd.block_mode_count_1plane_2plane_selected; - for (unsigned int i = start_2plane; i < end_2plane; i++) + for (size_t i = start_2plane; i < end_2plane; i++) { const block_mode& bm = bsd.block_modes[i]; assert(bm.is_dual_plane); @@ -827,7 +827,7 @@ static float compress_symbolic_block_for_partition_2planes( weight_high_value2[i] = 1.0f; } - unsigned int decimation_mode = bm.decimation_mode; + size_t decimation_mode = bm.decimation_mode; const auto& di = bsd.get_decimation_info(decimation_mode); ASTCENC_ALIGNAS float dec_weights_uquantf[BLOCK_MAX_WEIGHTS]; @@ -871,7 +871,7 @@ static float compress_symbolic_block_for_partition_2planes( merge_endpoints(ei1.ep, ei2.ep, plane2_component, epm); const auto& pi = bsd.get_partition_info(1, 0); - unsigned int candidate_count = compute_ideal_endpoint_formats( + size_t candidate_count = compute_ideal_endpoint_formats( pi, blk, epm, qwt_bitcounts, qwt_errors, config.tune_candidate_limit, bsd.block_mode_count_1plane_selected, bsd.block_mode_count_1plane_2plane_selected, @@ -882,7 +882,7 @@ static float compress_symbolic_block_for_partition_2planes( float best_errorval_in_mode = ERROR_CALC_DEFAULT; float best_errorval_in_scb = scb.errorval; - for (unsigned int i = 0; i < candidate_count; i++) + for (size_t i = 0; i < candidate_count; i++) { TRACE_NODE(node0, "candidate"); @@ -915,7 +915,7 @@ static float compress_symbolic_block_for_partition_2planes( workscb.weights[j + WEIGHTS_PLANE2_OFFSET] = u8_weight2_src[j]; } - for (unsigned int l = 0; l < config.tune_refinement_limit; l++) + for (size_t l = 0; l < config.tune_refinement_limit; l++) { recompute_ideal_colors_2planes( blk, bsd, di, @@ -957,7 +957,7 @@ static float compress_symbolic_block_for_partition_2planes( // iteration can help more so we give it a extra 8% leeway. Use this knowledge to // drive a heuristic to skip blocks that are unlikely to catch up with the best // block we have already. - unsigned int iters_remaining = config.tune_refinement_limit - l; + size_t iters_remaining = config.tune_refinement_limit - l; float threshold = (0.045f * static_cast(iters_remaining)) + 1.08f; if (errorval > (threshold * best_errorval_in_scb)) { @@ -1006,7 +1006,7 @@ static float compress_symbolic_block_for_partition_2planes( // Average refinement improvement is 3.5% per iteration, so skip blocks that are // unlikely to catch up with the best block we have already. Assume a 4.5% per step to // give benefit of the doubt ... - unsigned int iters_remaining = config.tune_refinement_limit - 1 - l; + size_t iters_remaining = config.tune_refinement_limit - 1 - l; float threshold = (0.045f * static_cast(iters_remaining)) + 1.0f; if (errorval > (threshold * best_errorval_in_scb)) { @@ -1187,13 +1187,13 @@ void compress_block( bool block_skip_two_plane = false; int max_partitions = ctx.config.tune_partition_count_limit; - unsigned int requested_partition_indices[3] { + size_t requested_partition_indices[3] { ctx.config.tune_2partition_index_limit, ctx.config.tune_3partition_index_limit, ctx.config.tune_4partition_index_limit }; - unsigned int requested_partition_trials[3] { + size_t requested_partition_trials[3] { ctx.config.tune_2partitioning_candidate_limit, ctx.config.tune_3partitioning_candidate_limit, ctx.config.tune_4partitioning_candidate_limit @@ -1369,19 +1369,19 @@ void compress_block( // Find best blocks for 2, 3 and 4 partitions for (int partition_count = 2; partition_count <= max_partitions; partition_count++) { - unsigned int partition_indices[TUNE_MAX_PARTITIONING_CANDIDATES]; + size_t partition_indices[TUNE_MAX_PARTITIONING_CANDIDATES]; - unsigned int requested_indices = requested_partition_indices[partition_count - 2]; + size_t requested_indices = requested_partition_indices[partition_count - 2]; - unsigned int requested_trials = requested_partition_trials[partition_count - 2]; + size_t requested_trials = requested_partition_trials[partition_count - 2]; requested_trials = astc::min(requested_trials, requested_indices); - unsigned int actual_trials = find_best_partition_candidates( + size_t actual_trials = find_best_partition_candidates( bsd, blk, partition_count, requested_indices, partition_indices, requested_trials); float best_error_in_prev = best_errorvals_for_pcount[partition_count - 2]; - for (unsigned int i = 0; i < actual_trials; i++) + for (size_t i = 0; i < actual_trials; i++) { TRACE_NODE(node1, "pass"); trace_add_data("partition_count", partition_count); diff --git a/Source/astcenc_compute_variance.cpp b/Source/astcenc_compute_variance.cpp index 48a4af8c..5a3d9ba2 100644 --- a/Source/astcenc_compute_variance.cpp +++ b/Source/astcenc_compute_variance.cpp @@ -420,26 +420,26 @@ void compute_pixel_region_variance( } /* See header for documentation. */ -unsigned int init_compute_averages( +size_t init_compute_averages( const astcenc_image& img, - unsigned int alpha_kernel_radius, + size_t alpha_kernel_radius, const astcenc_swizzle& swz, avg_args& ag ) { - unsigned int size_x = img.dim_x; - unsigned int size_y = img.dim_y; - unsigned int size_z = img.dim_z; + size_t size_x = img.dim_x; + size_t size_y = img.dim_y; + size_t size_z = img.dim_z; // Compute maximum block size and from that the working memory buffer size - unsigned int kernel_radius = alpha_kernel_radius; - unsigned int kerneldim = 2 * kernel_radius + 1; + size_t kernel_radius = alpha_kernel_radius; + size_t kerneldim = 2 * kernel_radius + 1; bool have_z = (size_z > 1); - unsigned int max_blk_size_xy = have_z ? 16 : 32; - unsigned int max_blk_size_z = astc::min(size_z, have_z ? 16u : 1u); + size_t max_blk_size_xy = have_z ? 16 : 32; + size_t max_blk_size_z = astc::min(size_z, have_z ? 16_z : 1_z); - unsigned int max_padsize_xy = max_blk_size_xy + kerneldim; - unsigned int max_padsize_z = max_blk_size_z + (have_z ? kerneldim : 0); + size_t max_padsize_xy = max_blk_size_xy + kerneldim; + size_t max_padsize_z = max_blk_size_z + (have_z ? kerneldim : 0); // Perform block-wise averages calculations across the image // Initialize fields which are not populated until later @@ -464,8 +464,8 @@ unsigned int init_compute_averages( ag.work_memory_size = 2 * max_padsize_xy * max_padsize_xy * max_padsize_z; // The parallel task count - unsigned int z_tasks = (size_z + max_blk_size_z - 1) / max_blk_size_z; - unsigned int y_tasks = (size_y + max_blk_size_xy - 1) / max_blk_size_xy; + size_t z_tasks = (size_z + max_blk_size_z - 1) / max_blk_size_z; + size_t y_tasks = (size_y + max_blk_size_xy - 1) / max_blk_size_xy; return z_tasks * y_tasks; } diff --git a/Source/astcenc_decompress_symbolic.cpp b/Source/astcenc_decompress_symbolic.cpp index e7791eef..a54c15e2 100644 --- a/Source/astcenc_decompress_symbolic.cpp +++ b/Source/astcenc_decompress_symbolic.cpp @@ -101,7 +101,7 @@ void unpack_weights( vtable_64x8 table; vtable_prepare(table, scb.weights); - for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH) { vint summed_value(8); vint weight_count(di.texel_weight_count + i); @@ -130,7 +130,7 @@ void unpack_weights( vtable_32x8 tab_plane2; vtable_prepare(tab_plane2, scb.weights + 32); - for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH) { vint sum_plane1(8); vint sum_plane2(8); @@ -190,7 +190,7 @@ void decompress_symbolic_block( // If we detected an error-block, blow up immediately. if (scb.block_type == SYM_BTYPE_ERROR) { - for (unsigned int i = 0; i < bsd.texel_count; i++) + for (size_t i = 0; i < bsd.texel_count; i++) { blk.data_r[i] = error_color_nan(); blk.data_g[i] = error_color_nan(); @@ -243,7 +243,7 @@ void decompress_symbolic_block( } } - for (unsigned int i = 0; i < bsd.texel_count; i++) + for (size_t i = 0; i < bsd.texel_count; i++) { blk.data_r[i] = color.lane<0>(); blk.data_g[i] = color.lane<1>(); @@ -356,8 +356,8 @@ float compute_symbolic_block_difference_2plane( vmask4 u8_mask = get_u8_component_mask(config.profile, blk); // Unpack and compute error for each texel in the partition - unsigned int texel_count = bsd.texel_count; - for (unsigned int i = 0; i < texel_count; i++) + size_t texel_count = bsd.texel_count; + for (size_t i = 0; i < texel_count; i++) { vint4 weight = select(vint4(plane1_weights[i]), vint4(plane2_weights[i]), plane2_mask); vint4 colori = lerp_color_int(u8_mask, ep0, ep1, weight); @@ -423,7 +423,7 @@ float compute_symbolic_block_difference_1plane( assert(scb.block_mode >= 0); // Get the appropriate partition-table entry - unsigned int partition_count = scb.partition_count; + size_t partition_count = scb.partition_count; const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index); // Get the appropriate block descriptor @@ -437,7 +437,7 @@ float compute_symbolic_block_difference_1plane( vmask4 u8_mask = get_u8_component_mask(config.profile, blk); vfloat4 summa = vfloat4::zero(); - for (unsigned int i = 0; i < partition_count; i++) + for (size_t i = 0; i < partition_count; i++) { // Decode the color endpoints for this partition vint4 ep0; @@ -452,10 +452,10 @@ float compute_symbolic_block_difference_1plane( ep0, ep1); // Unpack and compute error for each texel in the partition - unsigned int texel_count = pi.partition_texel_count[i]; - for (unsigned int j = 0; j < texel_count; j++) + size_t texel_count = pi.partition_texel_count[i]; + for (size_t j = 0; j < texel_count; j++) { - unsigned int tix = pi.texels_of_partition[i][j]; + size_t tix = pi.texels_of_partition[i][j]; vint4 colori = lerp_color_int(u8_mask, ep0, ep1, vint4(plane1_weights[tix])); @@ -546,8 +546,8 @@ float compute_symbolic_block_difference_1plane_1partition( vint lane_id = vint::lane_id(); - unsigned int texel_count = bsd.texel_count; - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + size_t texel_count = bsd.texel_count; + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { // Compute EP1 contribution vint weight1 = vint::loada(plane1_weights + i); diff --git a/Source/astcenc_diagnostic_trace.cpp b/Source/astcenc_diagnostic_trace.cpp index bcd6fa72..272ab7f2 100644 --- a/Source/astcenc_diagnostic_trace.cpp +++ b/Source/astcenc_diagnostic_trace.cpp @@ -236,7 +236,7 @@ void trace_add_data( /* See header for documentation. */ void trace_add_data( const char* key, - unsigned int value + size_t value ) { TraceNode* node = g_TraceLog->get_current_leaf(); node->add_attrib("int", key, std::to_string(value)); diff --git a/Source/astcenc_diagnostic_trace.h b/Source/astcenc_diagnostic_trace.h index f5586b0a..c4a0125f 100644 --- a/Source/astcenc_diagnostic_trace.h +++ b/Source/astcenc_diagnostic_trace.h @@ -111,7 +111,7 @@ class TraceNode /** * @brief The number of attributes and child nodes in this node. */ - unsigned int m_attrib_count { 0 }; + size_t m_attrib_count { 0 }; }; /** @@ -201,12 +201,12 @@ void trace_add_data(const char* key, float value); void trace_add_data(const char* key, int value); /** - * @brief Add an unsigned integer annotation to the current node. + * @brief Add an size_teger annotation to the current node. * * @param key The name of the attribute. * @param value The value of the attribute. */ -void trace_add_data(const char* key, unsigned int value); +void trace_add_data(const char* key, size_t value); #else diff --git a/Source/astcenc_entry.cpp b/Source/astcenc_entry.cpp index 4023797a..3af908be 100644 --- a/Source/astcenc_entry.cpp +++ b/Source/astcenc_entry.cpp @@ -39,16 +39,16 @@ struct astcenc_preset_config { float quality; - unsigned int tune_partition_count_limit; - unsigned int tune_2partition_index_limit; - unsigned int tune_3partition_index_limit; - unsigned int tune_4partition_index_limit; - unsigned int tune_block_mode_limit; - unsigned int tune_refinement_limit; - unsigned int tune_candidate_limit; - unsigned int tune_2partitioning_candidate_limit; - unsigned int tune_3partitioning_candidate_limit; - unsigned int tune_4partitioning_candidate_limit; + size_t tune_partition_count_limit; + size_t tune_2partition_index_limit; + size_t tune_3partition_index_limit; + size_t tune_4partition_index_limit; + size_t tune_block_mode_limit; + size_t tune_refinement_limit; + size_t tune_candidate_limit; + size_t tune_2partitioning_candidate_limit; + size_t tune_3partitioning_candidate_limit; + size_t tune_4partitioning_candidate_limit; float tune_db_limit_a_base; float tune_db_limit_b_base; float tune_mse_overshoot; @@ -192,9 +192,9 @@ static astcenc_error validate_profile( * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. */ static astcenc_error validate_block_size( - unsigned int block_x, - unsigned int block_y, - unsigned int block_z + size_t block_x, + size_t block_y, + size_t block_z ) { // Test if this is a legal block size at all bool is_legal = (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) || @@ -224,10 +224,10 @@ static astcenc_error validate_block_size( */ static astcenc_error validate_flags( astcenc_profile profile, - unsigned int flags + size_t flags ) { // Flags field must not contain any unknown flag bits - unsigned int exMask = ~ASTCENC_ALL_FLAGS; + size_t exMask = ~ASTCENC_ALL_FLAGS; if (popcount(flags & exMask) != 0) { return ASTCENC_ERR_BAD_FLAGS; @@ -396,16 +396,16 @@ static astcenc_error validate_config( config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f); - config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u); - config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); - config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); - config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); - config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u); - config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u); - config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES); - config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); - config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); - config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); + config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1_z, 4_z); + config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1_z, BLOCK_MAX_PARTITIONINGS); + config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1_z, BLOCK_MAX_PARTITIONINGS); + config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1_z, BLOCK_MAX_PARTITIONINGS); + config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1_z, 100_z); + config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1_z); + config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1_z, TUNE_MAX_TRIAL_CANDIDATES); + config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1_z, TUNE_MAX_PARTITIONING_CANDIDATES); + config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1_z, TUNE_MAX_PARTITIONING_CANDIDATES); + config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1_z, TUNE_MAX_PARTITIONING_CANDIDATES); config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f); config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f); config.tune_2partition_early_out_limit_factor = astc::max(config.tune_2partition_early_out_limit_factor, 0.0f); @@ -435,11 +435,11 @@ static astcenc_error validate_config( /* See header for documentation. */ astcenc_error astcenc_config_init( astcenc_profile profile, - unsigned int block_x, - unsigned int block_y, - unsigned int block_z, + size_t block_x, + size_t block_y, + size_t block_z, float quality, - unsigned int flags, + size_t flags, astcenc_config* configp ) { astcenc_error status; @@ -455,7 +455,7 @@ astcenc_error astcenc_config_init( std::memset(&config, 0, sizeof(config)); // Process the block size - block_z = astc::max(block_z, 1u); // For 2D blocks Z==0 is accepted, but convert to 1 + block_z = astc::max(block_z, 1_z); // For 2D blocks Z==0 is accepted, but convert to 1 status = validate_block_size(block_x, block_y, block_z); if (status != ASTCENC_SUCCESS) { @@ -546,7 +546,7 @@ astcenc_error astcenc_config_init( #define LERPI(param) astc::flt2int_rtn(\ (static_cast(node_a.param) * wt_node_a) + \ (static_cast(node_b.param) * wt_node_b)) - #define LERPUI(param) static_cast(LERPI(param)) + #define LERPUI(param) static_cast(LERPI(param)) config.tune_partition_count_limit = LERPI(tune_partition_count_limit); config.tune_2partition_index_limit = LERPI(tune_2partition_index_limit); @@ -612,7 +612,7 @@ astcenc_error astcenc_config_init( // Normal map encoding uses L+A blocks, so allow one more partitioning // than normal. We need need fewer bits for endpoints, so more likely // to be able to use more partitions than an RGB/RGBA block - config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1u, 4u); + config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1_z, 4_z); config.cw_g_weight = 0.0f; config.cw_b_weight = 0.0f; @@ -656,7 +656,7 @@ astcenc_error astcenc_config_init( /* See header for documentation. */ astcenc_error astcenc_context_alloc( const astcenc_config* configp, - unsigned int thread_count, + size_t thread_count, astcenc_context** context ) { astcenc_error status; @@ -790,7 +790,7 @@ void astcenc_context_free( */ static void compress_image( astcenc_context& ctxo, - unsigned int thread_index, + size_t thread_index, const astcenc_image& image, const astcenc_swizzle& swizzle, uint8_t* buffer @@ -851,18 +851,18 @@ static void compress_image( // All threads run this processing loop until there is no work remaining while (true) { - unsigned int count; - unsigned int base = ctxo.manage_compress.get_task_assignment(16, count); + size_t count; + size_t base = ctxo.manage_compress.get_task_assignment(16, count); if (!count) { break; } - for (unsigned int i = base; i < base + count; i++) + for (size_t i = base; i < base + count; i++) { // Decode i into x, y, z block indices int z = i / plane_blocks; - unsigned int rem = i - (z * plane_blocks); + size_t rem = i - (z * plane_blocks); int y = rem / row_blocks; int x = rem - (y * row_blocks); @@ -969,14 +969,14 @@ static void compute_averages( // All threads run this processing loop until there is no work remaining while (true) { - unsigned int count; - unsigned int base = ctx.manage_avg.get_task_assignment(16, count); + size_t count; + size_t base = ctx.manage_avg.get_task_assignment(16, count); if (!count) { break; } - for (unsigned int i = base; i < base + count; i++) + for (size_t i = base; i < base + count; i++) { int z = (i / (y_tasks)) * step_z; int y = (i - (z * y_tasks)) * step_xy; @@ -1010,7 +1010,7 @@ astcenc_error astcenc_compress_image( const astcenc_swizzle* swizzle, uint8_t* data_out, size_t data_len, - unsigned int thread_index + size_t thread_index ) { #if defined(ASTCENC_DECOMPRESS_ONLY) (void)ctxo; @@ -1041,13 +1041,13 @@ astcenc_error astcenc_compress_image( return ASTCENC_ERR_BAD_PARAM; } - unsigned int block_x = ctx->config.block_x; - unsigned int block_y = ctx->config.block_y; - unsigned int block_z = ctx->config.block_z; + size_t block_x = ctx->config.block_x; + size_t block_y = ctx->config.block_y; + size_t block_z = ctx->config.block_z; - unsigned int xblocks = (image.dim_x + block_x - 1) / block_x; - unsigned int yblocks = (image.dim_y + block_y - 1) / block_y; - unsigned int zblocks = (image.dim_z + block_z - 1) / block_z; + size_t xblocks = (image.dim_x + block_x - 1) / block_x; + size_t yblocks = (image.dim_y + block_y - 1) / block_y; + size_t zblocks = (image.dim_z + block_z - 1) / block_z; // Check we have enough output space (16 bytes per block) size_t size_needed = xblocks * yblocks * zblocks * 16; @@ -1153,7 +1153,7 @@ astcenc_error astcenc_decompress_image( size_t data_len, astcenc_image* image_outp, const astcenc_swizzle* swizzle, - unsigned int thread_index + size_t thread_index ) { astcenc_error status; astcenc_image& image_out = *image_outp; @@ -1171,14 +1171,14 @@ astcenc_error astcenc_decompress_image( return status; } - unsigned int block_x = ctx->config.block_x; - unsigned int block_y = ctx->config.block_y; - unsigned int block_z = ctx->config.block_z; + size_t block_x = ctx->config.block_x; + size_t block_y = ctx->config.block_y; + size_t block_z = ctx->config.block_z; - unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x; - unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y; - unsigned int zblocks = (image_out.dim_z + block_z - 1) / block_z; - unsigned int block_count = zblocks * yblocks * xblocks; + size_t xblocks = (image_out.dim_x + block_x - 1) / block_x; + size_t yblocks = (image_out.dim_y + block_y - 1) / block_y; + size_t zblocks = (image_out.dim_z + block_z - 1) / block_z; + size_t block_count = zblocks * yblocks * xblocks; int row_blocks = xblocks; int plane_blocks = xblocks * yblocks; @@ -1208,22 +1208,22 @@ astcenc_error astcenc_decompress_image( // All threads run this processing loop until there is no work remaining while (true) { - unsigned int count; - unsigned int base = ctxo->manage_decompress.get_task_assignment(128, count); + size_t count; + size_t base = ctxo->manage_decompress.get_task_assignment(128, count); if (!count) { break; } - for (unsigned int i = base; i < base + count; i++) + for (size_t i = base; i < base + count; i++) { // Decode i into x, y, z block indices int z = i / plane_blocks; - unsigned int rem = i - (z * plane_blocks); + size_t rem = i - (z * plane_blocks); int y = rem / row_blocks; int x = rem - (y * row_blocks); - unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16; + size_t offset = (((z * yblocks + y) * xblocks) + x) * 16; const uint8_t* bp = data + offset; symbolic_compressed_block scb; @@ -1320,7 +1320,7 @@ astcenc_error astcenc_get_block_info( info->weight_level_count = get_quant_level(bm.get_weight_quant_mode()); // Unpack color endpoints for each active partition - for (unsigned int i = 0; i < scb.partition_count; i++) + for (size_t i = 0; i < scb.partition_count; i++) { bool rgb_hdr; bool a_hdr; @@ -1352,7 +1352,7 @@ astcenc_error astcenc_get_block_info( int weight_plane2[BLOCK_MAX_TEXELS]; unpack_weights(bsd, scb, di, bm.is_dual_plane, weight_plane1, weight_plane2); - for (unsigned int i = 0; i < bsd.texel_count; i++) + for (size_t i = 0; i < bsd.texel_count; i++) { info->weight_values_plane1[i] = static_cast(weight_plane1[i]) * (1.0f / WEIGHTS_TEXEL_SUM); if (info->is_dual_plane_block) @@ -1362,7 +1362,7 @@ astcenc_error astcenc_get_block_info( } // Unpack partition assignments for each texel - for (unsigned int i = 0; i < bsd.texel_count; i++) + for (size_t i = 0; i < bsd.texel_count; i++) { info->partition_assignment[i] = pi.partition_of_texel[i]; } diff --git a/Source/astcenc_find_best_partitioning.cpp b/Source/astcenc_find_best_partitioning.cpp index f2e43282..da4eb9fb 100644 --- a/Source/astcenc_find_best_partitioning.cpp +++ b/Source/astcenc_find_best_partitioning.cpp @@ -59,25 +59,25 @@ */ static void kmeans_init( const image_block& blk, - unsigned int texel_count, - unsigned int partition_count, + size_t texel_count, + size_t partition_count, vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS] ) { promise(texel_count > 0); promise(partition_count > 0); - unsigned int clusters_selected = 0; + size_t clusters_selected = 0; float distances[BLOCK_MAX_TEXELS]; // Pick a random sample as first cluster center; 145897 from random.org - unsigned int sample = 145897 % texel_count; + size_t sample = 145897 % texel_count; vfloat4 center_color = blk.texel(sample); cluster_centers[clusters_selected] = center_color; clusters_selected++; // Compute the distance to the first cluster center float distance_sum = 0.0f; - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { vfloat4 color = blk.texel(i); vfloat4 diff = color - center_color; @@ -93,7 +93,7 @@ static void kmeans_init( 0.347661f, 0.731960f, 0.156391f }; - unsigned int cutoff = (clusters_selected - 1) + 3 * (partition_count - 2); + size_t cutoff = (clusters_selected - 1) + 3 * (partition_count - 2); // Pick the remaining samples as needed while (true) @@ -122,7 +122,7 @@ static void kmeans_init( // Compute the distance to the new cluster center, keep the min dist distance_sum = 0.0f; - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { vfloat4 color = blk.texel(i); vfloat4 diff = color - center_color; @@ -145,8 +145,8 @@ static void kmeans_init( */ static void kmeans_assign( const image_block& blk, - unsigned int texel_count, - unsigned int partition_count, + size_t texel_count, + size_t partition_count, const vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS], uint8_t partition_of_texel[BLOCK_MAX_TEXELS] ) { @@ -156,13 +156,13 @@ static void kmeans_assign( uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 }; // Find the best partition for every texel - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { float best_distance = std::numeric_limits::max(); - unsigned int best_partition = 0; + size_t best_partition = 0; vfloat4 color = blk.texel(i); - for (unsigned int j = 0; j < partition_count; j++) + for (size_t j = 0; j < partition_count; j++) { vfloat4 diff = color - cluster_centers[j]; float distance = dot_s(diff * diff, blk.channel_weight); @@ -185,7 +185,7 @@ static void kmeans_assign( do { problem_case = false; - for (unsigned int i = 0; i < partition_count; i++) + for (size_t i = 0; i < partition_count; i++) { if (partition_texel_count[i] == 0) { @@ -209,8 +209,8 @@ static void kmeans_assign( */ static void kmeans_update( const image_block& blk, - unsigned int texel_count, - unsigned int partition_count, + size_t texel_count, + size_t partition_count, vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS], const uint8_t partition_of_texel[BLOCK_MAX_TEXELS] ) { @@ -227,7 +227,7 @@ static void kmeans_update( uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 }; // Find the center-of-gravity in each cluster - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { uint8_t partition = partition_of_texel[i]; color_sum[partition] += blk.texel(i); @@ -235,7 +235,7 @@ static void kmeans_update( } // Set the center of gravity to be the new cluster center - for (unsigned int i = 0; i < partition_count; i++) + for (size_t i = 0; i < partition_count; i++) { float scale = 1.0f / static_cast(partition_texel_count[i]); cluster_centers[i] = color_sum[i] * scale; @@ -352,7 +352,7 @@ static inline uint8_t partition_mismatch4( return static_cast(astc::min(v0, v1, v2, v3) / 2); } -using mismatch_dispatch = unsigned int (*)(const uint64_t*, const uint64_t*); +using mismatch_dispatch = size_t (*)(const uint64_t*, const uint64_t*); /** * @brief Count the partition table mismatches vs the data clustering. @@ -364,16 +364,16 @@ using mismatch_dispatch = unsigned int (*)(const uint64_t*, const uint64_t*); */ static void count_partition_mismatch_bits( const block_size_descriptor& bsd, - unsigned int partition_count, + size_t partition_count, const uint64_t bitmaps[BLOCK_MAX_PARTITIONS], uint8_t mismatch_counts[BLOCK_MAX_PARTITIONINGS] ) { - unsigned int active_count = bsd.partitioning_count_selected[partition_count - 1]; + size_t active_count = bsd.partitioning_count_selected[partition_count - 1]; promise(active_count > 0); if (partition_count == 2) { - for (unsigned int i = 0; i < active_count; i++) + for (size_t i = 0; i < active_count; i++) { mismatch_counts[i] = partition_mismatch2(bitmaps, bsd.coverage_bitmaps_2[i]); assert(mismatch_counts[i] < BLOCK_MAX_KMEANS_TEXELS); @@ -382,7 +382,7 @@ static void count_partition_mismatch_bits( } else if (partition_count == 3) { - for (unsigned int i = 0; i < active_count; i++) + for (size_t i = 0; i < active_count; i++) { mismatch_counts[i] = partition_mismatch3(bitmaps, bsd.coverage_bitmaps_3[i]); assert(mismatch_counts[i] < BLOCK_MAX_KMEANS_TEXELS); @@ -391,7 +391,7 @@ static void count_partition_mismatch_bits( } else { - for (unsigned int i = 0; i < active_count; i++) + for (size_t i = 0; i < active_count; i++) { mismatch_counts[i] = partition_mismatch4(bitmaps, bsd.coverage_bitmaps_4[i]); assert(mismatch_counts[i] < BLOCK_MAX_KMEANS_TEXELS); @@ -409,9 +409,9 @@ static void count_partition_mismatch_bits( * * @return The number of active partitions in this selection. */ -static unsigned int get_partition_ordering_by_mismatch_bits( - unsigned int texel_count, - unsigned int partitioning_count, +static size_t get_partition_ordering_by_mismatch_bits( + size_t texel_count, + size_t partitioning_count, const uint8_t mismatch_count[BLOCK_MAX_PARTITIONINGS], uint16_t partition_ordering[BLOCK_MAX_PARTITIONINGS] ) { @@ -419,7 +419,7 @@ static unsigned int get_partition_ordering_by_mismatch_bits( uint16_t mscount[BLOCK_MAX_KMEANS_TEXELS] { 0 }; // Create the histogram of mismatch counts - for (unsigned int i = 0; i < partitioning_count; i++) + for (size_t i = 0; i < partitioning_count; i++) { mscount[mismatch_count[i]]++; } @@ -427,7 +427,7 @@ static unsigned int get_partition_ordering_by_mismatch_bits( // Create a running sum from the histogram array // Indices store previous values only; i.e. exclude self after sum uint16_t sum = 0; - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { uint16_t cnt = mscount[i]; mscount[i] = sum; @@ -436,9 +436,9 @@ static unsigned int get_partition_ordering_by_mismatch_bits( // Use the running sum as the index, incrementing after read to allow // sequential entries with the same count - for (unsigned int i = 0; i < partitioning_count; i++) + for (size_t i = 0; i < partitioning_count; i++) { - unsigned int idx = mscount[mismatch_count[i]]++; + size_t idx = mscount[mismatch_count[i]]++; partition_ordering[idx] = static_cast(i); } @@ -455,17 +455,17 @@ static unsigned int get_partition_ordering_by_mismatch_bits( * * @return The number of active partitionings in this selection. */ -static unsigned int compute_kmeans_partition_ordering( +static size_t compute_kmeans_partition_ordering( const block_size_descriptor& bsd, const image_block& blk, - unsigned int partition_count, + size_t partition_count, uint16_t partition_ordering[BLOCK_MAX_PARTITIONINGS] ) { vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS]; uint8_t texel_partitions[BLOCK_MAX_TEXELS]; // Use three passes of k-means clustering to partition the block data - for (unsigned int i = 0; i < 3; i++) + for (size_t i = 0; i < 3; i++) { if (i == 0) { @@ -481,11 +481,11 @@ static unsigned int compute_kmeans_partition_ordering( // Construct the block bitmaps of texel assignments to each partition uint64_t bitmaps[BLOCK_MAX_PARTITIONS] { 0 }; - unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS); + size_t texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS); promise(texels_to_process > 0); - for (unsigned int i = 0; i < texels_to_process; i++) + for (size_t i = 0; i < texels_to_process; i++) { - unsigned int idx = bsd.kmeans_texels[i]; + size_t idx = bsd.kmeans_texels[i]; bitmaps[texel_partitions[idx]] |= 1ULL << i; } @@ -510,11 +510,11 @@ static unsigned int compute_kmeans_partition_ordering( * @param[out] best_partitions The array of best partition values. */ static void insert_result( - unsigned int max_values, + size_t max_values, float this_error, - unsigned int this_partition, + size_t this_partition, float* best_errors, - unsigned int* best_partitions) + size_t* best_partitions) { promise(max_values > 0); @@ -525,7 +525,7 @@ static void insert_result( } // Else insert into the list in error-order - for (unsigned int i = 0; i < max_values; i++) + for (size_t i = 0; i < max_values; i++) { // Existing result is better - move on ... if (this_error > best_errors[i]) @@ -534,7 +534,7 @@ static void insert_result( } // Move existing results down one - for (unsigned int j = max_values - 1; j > i; j--) + for (size_t j = max_values - 1; j > i; j--) { best_errors[j] = best_errors[j - 1]; best_partitions[j] = best_partitions[j - 1]; @@ -548,17 +548,17 @@ static void insert_result( } /* See header for documentation. */ -unsigned int find_best_partition_candidates( +size_t find_best_partition_candidates( const block_size_descriptor& bsd, const image_block& blk, - unsigned int partition_count, - unsigned int partition_search_limit, - unsigned int best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES], - unsigned int requested_candidates + size_t partition_count, + size_t partition_search_limit, + size_t best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES], + size_t requested_candidates ) { // Constant used to estimate quantization error for a given partitioning; the optimal value for // this depends on bitrate. These values have been determined empirically. - unsigned int texels_per_block = bsd.texel_count; + size_t texels_per_block = bsd.texel_count; float weight_imprecision_estim = 0.055f; if (texels_per_block <= 20) { @@ -579,7 +579,7 @@ unsigned int find_best_partition_candidates( weight_imprecision_estim = weight_imprecision_estim * weight_imprecision_estim; uint16_t partition_sequence[BLOCK_MAX_PARTITIONINGS]; - unsigned int sequence_len = compute_kmeans_partition_ordering(bsd, blk, partition_count, partition_sequence); + size_t sequence_len = compute_kmeans_partition_ordering(bsd, blk, partition_count, partition_sequence); partition_search_limit = astc::min(partition_search_limit, sequence_len); requested_candidates = astc::min(partition_search_limit, requested_candidates); @@ -587,13 +587,13 @@ unsigned int find_best_partition_candidates( // Partitioning errors assuming uncorrelated-chrominance endpoints float uncor_best_errors[TUNE_MAX_PARTITIONING_CANDIDATES]; - unsigned int uncor_best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES]; + size_t uncor_best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES]; // Partitioning errors assuming same-chrominance endpoints float samec_best_errors[TUNE_MAX_PARTITIONING_CANDIDATES]; - unsigned int samec_best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES]; + size_t samec_best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES]; - for (unsigned int i = 0; i < requested_candidates; i++) + for (size_t i = 0; i < requested_candidates; i++) { uncor_best_errors[i] = ERROR_CALC_DEFAULT; samec_best_errors[i] = ERROR_CALC_DEFAULT; @@ -601,9 +601,9 @@ unsigned int find_best_partition_candidates( if (uses_alpha) { - for (unsigned int i = 0; i < partition_search_limit; i++) + for (size_t i = 0; i < partition_search_limit; i++) { - unsigned int partition = partition_sequence[i]; + size_t partition = partition_sequence[i]; const auto& pi = bsd.get_raw_partition_info(partition_count, partition); // Compute weighting to give to each component in each partition @@ -619,7 +619,7 @@ unsigned int find_best_partition_candidates( float line_lengths[BLOCK_MAX_PARTITIONS]; - for (unsigned int j = 0; j < partition_count; j++) + for (size_t j = 0; j < partition_count; j++) { partition_metrics& pm = pms[j]; @@ -657,7 +657,7 @@ unsigned int find_best_partition_candidates( // 4(optimized): square the vector once, then do a dot-product with the average // texel error, then multiply by the number of texels. - for (unsigned int j = 0; j < partition_count; j++) + for (size_t j = 0; j < partition_count; j++) { float tpp = static_cast(pi.partition_texel_count[j]); vfloat4 error_weights(tpp * weight_imprecision_estim); @@ -675,9 +675,9 @@ unsigned int find_best_partition_candidates( } else { - for (unsigned int i = 0; i < partition_search_limit; i++) + for (size_t i = 0; i < partition_search_limit; i++) { - unsigned int partition = partition_sequence[i]; + size_t partition = partition_sequence[i]; const auto& pi = bsd.get_raw_partition_info(partition_count, partition); // Compute weighting to give to each component in each partition @@ -686,7 +686,7 @@ unsigned int find_best_partition_candidates( partition_lines3 plines[BLOCK_MAX_PARTITIONS]; - for (unsigned int j = 0; j < partition_count; j++) + for (size_t j = 0; j < partition_count; j++) { partition_metrics& pm = pms[j]; partition_lines3& pl = plines[j]; @@ -723,7 +723,7 @@ unsigned int find_best_partition_candidates( // 4(optimized): square the vector once, then do a dot-product with the average // texel error, then multiply by the number of texels. - for (unsigned int j = 0; j < partition_count; j++) + for (size_t j = 0; j < partition_count; j++) { partition_lines3& pl = plines[j]; @@ -742,23 +742,23 @@ unsigned int find_best_partition_candidates( } } - unsigned int interleave[2 * TUNE_MAX_PARTITIONING_CANDIDATES]; - for (unsigned int i = 0; i < requested_candidates; i++) + size_t interleave[2 * TUNE_MAX_PARTITIONING_CANDIDATES]; + for (size_t i = 0; i < requested_candidates; i++) { interleave[2 * i] = bsd.get_raw_partition_info(partition_count, uncor_best_partitions[i]).partition_index; interleave[2 * i + 1] = bsd.get_raw_partition_info(partition_count, samec_best_partitions[i]).partition_index; } uint64_t bitmasks[1024/64] { 0 }; - unsigned int emitted = 0; + size_t emitted = 0; // Deduplicate the first "requested" entries - for (unsigned int i = 0; i < requested_candidates * 2; i++) + for (size_t i = 0; i < requested_candidates * 2; i++) { - unsigned int partition = interleave[i]; + size_t partition = interleave[i]; - unsigned int word = partition / 64; - unsigned int bit = partition % 64; + size_t word = partition / 64; + size_t bit = partition % 64; bool written = bitmasks[word] & (1ull << bit); diff --git a/Source/astcenc_ideal_endpoints_and_weights.cpp b/Source/astcenc_ideal_endpoints_and_weights.cpp index 8e6ee2f4..fa3347f6 100644 --- a/Source/astcenc_ideal_endpoints_and_weights.cpp +++ b/Source/astcenc_ideal_endpoints_and_weights.cpp @@ -38,7 +38,7 @@ static vfloat bilinear_infill_vla( const decimation_info& di, const float* weights, - unsigned int index + size_t index ) { // Load the bilinear filter texel weight indexes in the decimated grid const uint8_t* weight_idx0 = di.texel_weights_tr[0] + index; @@ -78,7 +78,7 @@ static vfloat bilinear_infill_vla( static vfloat bilinear_infill_vla_2( const decimation_info& di, const float* weights, - unsigned int index + size_t index ) { // Load the bilinear filter texel weight indexes in the decimated grid const uint8_t* weight_idx0 = di.texel_weights_tr[0] + index; @@ -108,13 +108,13 @@ static void compute_ideal_colors_and_weights_1_comp( const image_block& blk, const partition_info& pi, endpoints_and_weights& ei, - unsigned int component + size_t component ) { - unsigned int partition_count = pi.partition_count; + size_t partition_count = pi.partition_count; ei.ep.partition_count = partition_count; promise(partition_count > 0); - unsigned int texel_count = blk.texel_count; + size_t texel_count = blk.texel_count; promise(texel_count > 0); float error_weight; @@ -146,15 +146,15 @@ static void compute_ideal_colors_and_weights_1_comp( bool is_constant_wes { true }; float partition0_len_sq { 0.0f }; - for (unsigned int i = 0; i < partition_count; i++) + for (size_t i = 0; i < partition_count; i++) { float lowvalue { 1e10f }; float highvalue { -1e10f }; - unsigned int partition_texel_count = pi.partition_texel_count[i]; - for (unsigned int j = 0; j < partition_texel_count; j++) + size_t partition_texel_count = pi.partition_texel_count[i]; + for (size_t j = 0; j < partition_texel_count; j++) { - unsigned int tix = pi.texels_of_partition[i][j]; + size_t tix = pi.texels_of_partition[i][j]; float value = data_vr[tix]; lowvalue = astc::min(value, lowvalue); highvalue = astc::max(value, highvalue); @@ -179,9 +179,9 @@ static void compute_ideal_colors_and_weights_1_comp( is_constant_wes = is_constant_wes && length_squared == partition0_len_sq; } - for (unsigned int j = 0; j < partition_texel_count; j++) + for (size_t j = 0; j < partition_texel_count; j++) { - unsigned int tix = pi.texels_of_partition[i][j]; + size_t tix = pi.texels_of_partition[i][j]; float value = (data_vr[tix] - lowvalue) * scale; value = astc::clamp1f(value); @@ -195,8 +195,8 @@ static void compute_ideal_colors_and_weights_1_comp( } // Zero initialize any SIMD over-fetch - unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count); - for (unsigned int i = texel_count; i < texel_count_simd; i++) + size_t texel_count_simd = round_up_to_simd_multiple_vla(texel_count); + for (size_t i = texel_count; i < texel_count_simd; i++) { ei.weights[i] = 0.0f; ei.weight_error_scale[i] = 0.0f; @@ -221,11 +221,11 @@ static void compute_ideal_colors_and_weights_2_comp( int component1, int component2 ) { - unsigned int partition_count = pi.partition_count; + size_t partition_count = pi.partition_count; ei.ep.partition_count = partition_count; promise(partition_count > 0); - unsigned int texel_count = blk.texel_count; + size_t texel_count = blk.texel_count; promise(texel_count > 0); partition_metrics pms[BLOCK_MAX_PARTITIONS]; @@ -266,7 +266,7 @@ static void compute_ideal_colors_and_weights_2_comp( vmask4 comp1_mask = vint4::lane_id() == vint4(component1); vmask4 comp2_mask = vint4::lane_id() == vint4(component2); - for (unsigned int i = 0; i < partition_count; i++) + for (size_t i = 0; i < partition_count; i++) { vfloat4 dir = pms[i].dir; if (hadd_s(dir) < 0.0f) @@ -278,10 +278,10 @@ static void compute_ideal_colors_and_weights_2_comp( float lowparam { 1e10f }; float highparam { -1e10f }; - unsigned int partition_texel_count = pi.partition_texel_count[i]; - for (unsigned int j = 0; j < partition_texel_count; j++) + size_t partition_texel_count = pi.partition_texel_count[i]; + for (size_t j = 0; j < partition_texel_count; j++) { - unsigned int tix = pi.texels_of_partition[i][j]; + size_t tix = pi.texels_of_partition[i][j]; vfloat4 point = vfloat2(data_vr[tix], data_vg[tix]); float param = dot_s(point - line.a, line.b); ei.weights[tix] = param; @@ -311,9 +311,9 @@ static void compute_ideal_colors_and_weights_2_comp( is_constant_wes = is_constant_wes && length_squared == partition0_len_sq; } - for (unsigned int j = 0; j < partition_texel_count; j++) + for (size_t j = 0; j < partition_texel_count; j++) { - unsigned int tix = pi.texels_of_partition[i][j]; + size_t tix = pi.texels_of_partition[i][j]; float idx = (ei.weights[tix] - lowparam) * scale; idx = astc::clamp1f(idx); @@ -333,8 +333,8 @@ static void compute_ideal_colors_and_weights_2_comp( } // Zero initialize any SIMD over-fetch - unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count); - for (unsigned int i = texel_count; i < texel_count_simd; i++) + size_t texel_count_simd = round_up_to_simd_multiple_vla(texel_count); + for (size_t i = texel_count; i < texel_count_simd; i++) { ei.weights[i] = 0.0f; ei.weight_error_scale[i] = 0.0f; @@ -355,13 +355,13 @@ static void compute_ideal_colors_and_weights_3_comp( const image_block& blk, const partition_info& pi, endpoints_and_weights& ei, - unsigned int omitted_component + size_t omitted_component ) { - unsigned int partition_count = pi.partition_count; + size_t partition_count = pi.partition_count; ei.ep.partition_count = partition_count; promise(partition_count > 0); - unsigned int texel_count = blk.texel_count; + size_t texel_count = blk.texel_count; promise(texel_count > 0); partition_metrics pms[BLOCK_MAX_PARTITIONS]; @@ -415,7 +415,7 @@ static void compute_ideal_colors_and_weights_3_comp( bool is_constant_wes { true }; float partition0_len_sq { 0.0f }; - for (unsigned int i = 0; i < partition_count; i++) + for (size_t i = 0; i < partition_count; i++) { vfloat4 dir = pms[i].dir; if (hadd_rgb_s(dir) < 0.0f) @@ -427,10 +427,10 @@ static void compute_ideal_colors_and_weights_3_comp( float lowparam { 1e10f }; float highparam { -1e10f }; - unsigned int partition_texel_count = pi.partition_texel_count[i]; - for (unsigned int j = 0; j < partition_texel_count; j++) + size_t partition_texel_count = pi.partition_texel_count[i]; + for (size_t j = 0; j < partition_texel_count; j++) { - unsigned int tix = pi.texels_of_partition[i][j]; + size_t tix = pi.texels_of_partition[i][j]; vfloat4 point = vfloat3(data_vr[tix], data_vg[tix], data_vb[tix]); float param = dot3_s(point - line.a, line.b); ei.weights[tix] = param; @@ -460,9 +460,9 @@ static void compute_ideal_colors_and_weights_3_comp( is_constant_wes = is_constant_wes && length_squared == partition0_len_sq; } - for (unsigned int j = 0; j < partition_texel_count; j++) + for (size_t j = 0; j < partition_texel_count; j++) { - unsigned int tix = pi.texels_of_partition[i][j]; + size_t tix = pi.texels_of_partition[i][j]; float idx = (ei.weights[tix] - lowparam) * scale; idx = astc::clamp1f(idx); @@ -500,8 +500,8 @@ static void compute_ideal_colors_and_weights_3_comp( } // Zero initialize any SIMD over-fetch - unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count); - for (unsigned int i = texel_count; i < texel_count_simd; i++) + size_t texel_count_simd = round_up_to_simd_multiple_vla(texel_count); + for (size_t i = texel_count; i < texel_count_simd; i++) { ei.weights[i] = 0.0f; ei.weight_error_scale[i] = 0.0f; @@ -524,9 +524,9 @@ static void compute_ideal_colors_and_weights_4_comp( ) { const float error_weight = hadd_s(blk.channel_weight) / 4.0f; - unsigned int partition_count = pi.partition_count; + size_t partition_count = pi.partition_count; - unsigned int texel_count = blk.texel_count; + size_t texel_count = blk.texel_count; promise(texel_count > 0); promise(partition_count > 0); @@ -537,7 +537,7 @@ static void compute_ideal_colors_and_weights_4_comp( bool is_constant_wes { true }; float partition0_len_sq { 0.0f }; - for (unsigned int i = 0; i < partition_count; i++) + for (size_t i = 0; i < partition_count; i++) { vfloat4 dir = pms[i].dir; if (hadd_rgb_s(dir) < 0.0f) @@ -549,10 +549,10 @@ static void compute_ideal_colors_and_weights_4_comp( float lowparam { 1e10f }; float highparam { -1e10f }; - unsigned int partition_texel_count = pi.partition_texel_count[i]; - for (unsigned int j = 0; j < partition_texel_count; j++) + size_t partition_texel_count = pi.partition_texel_count[i]; + for (size_t j = 0; j < partition_texel_count; j++) { - unsigned int tix = pi.texels_of_partition[i][j]; + size_t tix = pi.texels_of_partition[i][j]; vfloat4 point = blk.texel(tix); float param = dot_s(point - line.a, line.b); ei.weights[tix] = param; @@ -585,9 +585,9 @@ static void compute_ideal_colors_and_weights_4_comp( ei.ep.endpt0[i] = line.a + line.b * lowparam; ei.ep.endpt1[i] = line.a + line.b * highparam; - for (unsigned int j = 0; j < partition_texel_count; j++) + for (size_t j = 0; j < partition_texel_count; j++) { - unsigned int tix = pi.texels_of_partition[i][j]; + size_t tix = pi.texels_of_partition[i][j]; float idx = (ei.weights[tix] - lowparam) * scale; idx = astc::clamp1f(idx); @@ -598,8 +598,8 @@ static void compute_ideal_colors_and_weights_4_comp( } // Zero initialize any SIMD over-fetch - unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count); - for (unsigned int i = texel_count; i < texel_count_simd; i++) + size_t texel_count_simd = round_up_to_simd_multiple_vla(texel_count); + for (size_t i = texel_count; i < texel_count_simd; i++) { ei.weights[i] = 0.0f; ei.weight_error_scale[i] = 0.0f; @@ -630,7 +630,7 @@ void compute_ideal_colors_and_weights_1plane( void compute_ideal_colors_and_weights_2planes( const block_size_descriptor& bsd, const image_block& blk, - unsigned int plane2_component, + size_t plane2_component, endpoints_and_weights& ei1, endpoints_and_weights& ei2 ) { @@ -691,13 +691,13 @@ float compute_error_of_weight_set_1plane( const float* dec_weight_quant_uvalue ) { vfloatacc error_summav = vfloatacc::zero(); - unsigned int texel_count = di.texel_count; + size_t texel_count = di.texel_count; promise(texel_count > 0); // Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized if (di.max_texel_weight_count > 2) { - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { // Compute the bilinear interpolation of the decimated weight grid vfloat current_values = bilinear_infill_vla(di, dec_weight_quant_uvalue, i); @@ -713,7 +713,7 @@ float compute_error_of_weight_set_1plane( } else if (di.max_texel_weight_count > 1) { - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { // Compute the bilinear interpolation of the decimated weight grid vfloat current_values = bilinear_infill_vla_2(di, dec_weight_quant_uvalue, i); @@ -729,7 +729,7 @@ float compute_error_of_weight_set_1plane( } else { - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { // Load the weight set directly, without interpolation vfloat current_values = loada(dec_weight_quant_uvalue + i); @@ -757,13 +757,13 @@ float compute_error_of_weight_set_2planes( const float* dec_weight_quant_uvalue_plane2 ) { vfloatacc error_summav = vfloatacc::zero(); - unsigned int texel_count = di.texel_count; + size_t texel_count = di.texel_count; promise(texel_count > 0); // Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized if (di.max_texel_weight_count > 2) { - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { // Plane 1 // Compute the bilinear interpolation of the decimated weight grid @@ -788,7 +788,7 @@ float compute_error_of_weight_set_2planes( } else if (di.max_texel_weight_count > 1) { - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { // Plane 1 // Compute the bilinear interpolation of the decimated weight grid @@ -813,7 +813,7 @@ float compute_error_of_weight_set_2planes( } else { - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { // Plane 1 // Load the weight set directly, without interpolation @@ -847,8 +847,8 @@ void compute_ideal_weights_for_decimation( const decimation_info& di, float* dec_weight_ideal_value ) { - unsigned int texel_count = di.texel_count; - unsigned int weight_count = di.weight_count; + size_t texel_count = di.texel_count; + size_t weight_count = di.weight_count; bool is_direct = texel_count == weight_count; promise(texel_count > 0); promise(weight_count > 0); @@ -857,7 +857,7 @@ void compute_ideal_weights_for_decimation( // zero-initialized SIMD over-fetch region if (is_direct) { - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vfloat weight(ei.weights + i); storea(weight, dec_weight_ideal_value + i); @@ -875,7 +875,7 @@ void compute_ideal_weights_for_decimation( // This overshoots - this is OK as we initialize the array tails in the // decimation table structures to safe values ... - for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) { // Start with a small value to avoid div-by-zero later vfloat weight_weight(1e-10f); @@ -883,10 +883,10 @@ void compute_ideal_weights_for_decimation( // Accumulate error weighting of all the texels using this weight vint weight_texel_count(di.weight_texel_count + i); - unsigned int max_texel_count = hmax_s(weight_texel_count); + size_t max_texel_count = hmax_s(weight_texel_count); promise(max_texel_count > 0); - for (unsigned int j = 0; j < max_texel_count; j++) + for (size_t j = 0; j < max_texel_count; j++) { const uint8_t* texel = di.weight_texels_tr[j] + i; vfloat weight = loada(di.weights_texel_contribs_tr[j] + i); @@ -910,7 +910,7 @@ void compute_ideal_weights_for_decimation( // over-process full SIMD vectors - the tail is zeroed. if (di.max_texel_weight_count <= 2) { - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vfloat weight = bilinear_infill_vla_2(di, dec_weight_ideal_value, i); storea(weight, infilled_weights + i); @@ -918,7 +918,7 @@ void compute_ideal_weights_for_decimation( } else { - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { vfloat weight = bilinear_infill_vla(di, dec_weight_ideal_value, i); storea(weight, infilled_weights + i); @@ -930,7 +930,7 @@ void compute_ideal_weights_for_decimation( constexpr float stepsize = 0.25f; constexpr float chd_scale = -WEIGHTS_TEXEL_SUM; - for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) { vfloat weight_val = loada(dec_weight_ideal_value + i); @@ -941,10 +941,10 @@ void compute_ideal_weights_for_decimation( // Accumulate error weighting of all the texels using this weight vint weight_texel_count(di.weight_texel_count + i); - unsigned int max_texel_count = hmax_s(weight_texel_count); + size_t max_texel_count = hmax_s(weight_texel_count); promise(max_texel_count > 0); - for (unsigned int j = 0; j < max_texel_count; j++) + for (size_t j = 0; j < max_texel_count; j++) { const uint8_t* texel = di.weight_texels_tr[j] + i; vfloat contrib_weight = loada(di.weights_texel_contribs_tr[j] + i); @@ -1152,16 +1152,16 @@ void recompute_ideal_colors_1plane( vfloat4 rgbs_vectors[BLOCK_MAX_PARTITIONS], vfloat4 rgbo_vectors[BLOCK_MAX_PARTITIONS] ) { - unsigned int weight_count = di.weight_count; - unsigned int total_texel_count = blk.texel_count; - unsigned int partition_count = pi.partition_count; + size_t weight_count = di.weight_count; + size_t total_texel_count = blk.texel_count; + size_t partition_count = pi.partition_count; promise(weight_count > 0); promise(total_texel_count > 0); promise(partition_count > 0); ASTCENC_ALIGNAS float dec_weight[BLOCK_MAX_WEIGHTS]; - for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) { vint unquant_value(dec_weights_uquant + i); vfloat unquant_valuef = int_to_float(unquant_value) * vfloat(1.0f / 64.0f); @@ -1176,7 +1176,7 @@ void recompute_ideal_colors_1plane( } else if (di.max_texel_weight_count <= 2) { - for (unsigned int i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH) { vfloat weight = bilinear_infill_vla_2(di, dec_weight, i); storea(weight, undec_weight + i); @@ -1186,7 +1186,7 @@ void recompute_ideal_colors_1plane( } else { - for (unsigned int i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH) { vfloat weight = bilinear_infill_vla(di, dec_weight, i); storea(weight, undec_weight + i); @@ -1197,9 +1197,9 @@ void recompute_ideal_colors_1plane( vfloat4 rgba_sum(blk.data_mean * static_cast(blk.texel_count)); - for (unsigned int i = 0; i < partition_count; i++) + for (size_t i = 0; i < partition_count; i++) { - unsigned int texel_count = pi.partition_texel_count[i]; + size_t texel_count = pi.partition_texel_count[i]; const uint8_t *texel_indexes = pi.texels_of_partition[i]; // Only compute a partition mean if more than one partition @@ -1207,9 +1207,9 @@ void recompute_ideal_colors_1plane( { rgba_sum = vfloat4::zero(); promise(texel_count > 0); - for (unsigned int j = 0; j < texel_count; j++) + for (size_t j = 0; j < texel_count; j++) { - unsigned int tix = texel_indexes[j]; + size_t tix = texel_indexes[j]; rgba_sum += blk.texel(tix); } } @@ -1238,9 +1238,9 @@ void recompute_ideal_colors_1plane( vfloat4 color_weight = blk.channel_weight; float ls_weight = hadd_rgb_s(color_weight); - for (unsigned int j = 0; j < texel_count; j++) + for (size_t j = 0; j < texel_count; j++) { - unsigned int tix = texel_indexes[j]; + size_t tix = texel_indexes[j]; vfloat4 rgba = blk.texel(tix); float idx0 = undec_weight_ref[tix]; @@ -1377,8 +1377,8 @@ void recompute_ideal_colors_2planes( vfloat4& rgbo_vector, int plane2_component ) { - unsigned int weight_count = di.weight_count; - unsigned int total_texel_count = blk.texel_count; + size_t weight_count = di.weight_count; + size_t total_texel_count = blk.texel_count; promise(total_texel_count > 0); promise(weight_count > 0); @@ -1388,7 +1388,7 @@ void recompute_ideal_colors_2planes( assert(weight_count <= BLOCK_MAX_WEIGHTS_2PLANE); - for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) { vint unquant_value1(dec_weights_uquant_plane1 + i); vfloat unquant_value1f = int_to_float(unquant_value1) * vfloat(1.0f / 64.0f); @@ -1412,7 +1412,7 @@ void recompute_ideal_colors_2planes( } else if (di.max_texel_weight_count <= 2) { - for (unsigned int i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH) { vfloat weight = bilinear_infill_vla_2(di, dec_weight_plane1, i); storea(weight, undec_weight_plane1 + i); @@ -1426,7 +1426,7 @@ void recompute_ideal_colors_2planes( } else { - for (unsigned int i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH) { vfloat weight = bilinear_infill_vla(di, dec_weight_plane1, i); storea(weight, undec_weight_plane1 + i); @@ -1439,7 +1439,7 @@ void recompute_ideal_colors_2planes( undec_weight_plane2_ref = undec_weight_plane2; } - unsigned int texel_count = bsd.texel_count; + size_t texel_count = bsd.texel_count; vfloat4 rgba_weight_sum = max(blk.channel_weight * static_cast(texel_count), 1e-17f); vfloat4 scale_dir = normalize(blk.data_mean.swz<0, 1, 2>()); @@ -1471,7 +1471,7 @@ void recompute_ideal_colors_2planes( vfloat4 color_weight = blk.channel_weight; float ls_weight = hadd_rgb_s(color_weight); - for (unsigned int j = 0; j < texel_count; j++) + for (size_t j = 0; j < texel_count; j++) { vfloat4 rgba = blk.texel(j); diff --git a/Source/astcenc_image.cpp b/Source/astcenc_image.cpp index 079f69f1..2c73c058 100644 --- a/Source/astcenc_image.cpp +++ b/Source/astcenc_image.cpp @@ -153,14 +153,14 @@ void load_image_block( const astcenc_image& img, image_block& blk, const block_size_descriptor& bsd, - unsigned int xpos, - unsigned int ypos, - unsigned int zpos, + size_t xpos, + size_t ypos, + size_t zpos, const astcenc_swizzle& swz ) { - unsigned int xsize = img.dim_x; - unsigned int ysize = img.dim_y; - unsigned int zsize = img.dim_z; + size_t xsize = img.dim_x; + size_t ysize = img.dim_y; + size_t zsize = img.dim_z; blk.xpos = xpos; blk.ypos = ypos; @@ -208,18 +208,18 @@ void load_image_block( converter = encode_texel_lns; } - for (unsigned int z = 0; z < bsd.zdim; z++) + for (size_t z = 0; z < bsd.zdim; z++) { - unsigned int zi = astc::min(zpos + z, zsize - 1); + size_t zi = astc::min(zpos + z, zsize - 1); void* plane = img.data[zi]; - for (unsigned int y = 0; y < bsd.ydim; y++) + for (size_t y = 0; y < bsd.ydim; y++) { - unsigned int yi = astc::min(ypos + y, ysize - 1); + size_t yi = astc::min(ypos + y, ysize - 1); - for (unsigned int x = 0; x < bsd.xdim; x++) + for (size_t x = 0; x < bsd.xdim; x++) { - unsigned int xi = astc::min(xpos + x, xsize - 1); + size_t xi = astc::min(xpos + x, xsize - 1); vfloat4 datav = loader(plane, (4 * xsize * yi) + (4 * xi)); datav = swizzler(datav, swz); @@ -270,16 +270,16 @@ void load_image_block_fast_ldr( const astcenc_image& img, image_block& blk, const block_size_descriptor& bsd, - unsigned int xpos, - unsigned int ypos, - unsigned int zpos, + size_t xpos, + size_t ypos, + size_t zpos, const astcenc_swizzle& swz ) { (void)swz; (void)decode_mode; - unsigned int xsize = img.dim_x; - unsigned int ysize = img.dim_y; + size_t xsize = img.dim_x; + size_t ysize = img.dim_y; blk.xpos = xpos; blk.ypos = ypos; @@ -292,13 +292,13 @@ void load_image_block_fast_ldr( int idx = 0; const uint8_t* plane = static_cast(img.data[0]); - for (unsigned int y = ypos; y < ypos + bsd.ydim; y++) + for (size_t y = ypos; y < ypos + bsd.ydim; y++) { - unsigned int yi = astc::min(y, ysize - 1); + size_t yi = astc::min(y, ysize - 1); - for (unsigned int x = xpos; x < xpos + bsd.xdim; x++) + for (size_t x = xpos; x < xpos + bsd.xdim; x++) { - unsigned int xi = astc::min(x, xsize - 1); + size_t xi = astc::min(x, xsize - 1); vint4 datavi = vint4(plane + (4 * xsize * yi) + (4 * xi)); vfloat4 datav = int_to_float(datavi) * (65535.0f / 255.0f); @@ -336,26 +336,26 @@ void store_image_block( astcenc_image& img, const image_block& blk, const block_size_descriptor& bsd, - unsigned int xpos, - unsigned int ypos, - unsigned int zpos, + size_t xpos, + size_t ypos, + size_t zpos, const astcenc_swizzle& swz ) { - unsigned int x_size = img.dim_x; - unsigned int x_start = xpos; - unsigned int x_end = astc::min(x_size, xpos + bsd.xdim); - unsigned int x_count = x_end - x_start; - unsigned int x_nudge = bsd.xdim - x_count; - - unsigned int y_size = img.dim_y; - unsigned int y_start = ypos; - unsigned int y_end = astc::min(y_size, ypos + bsd.ydim); - unsigned int y_count = y_end - y_start; - unsigned int y_nudge = (bsd.ydim - y_count) * bsd.xdim; - - unsigned int z_size = img.dim_z; - unsigned int z_start = zpos; - unsigned int z_end = astc::min(z_size, zpos + bsd.zdim); + size_t x_size = img.dim_x; + size_t x_start = xpos; + size_t x_end = astc::min(x_size, xpos + bsd.xdim); + size_t x_count = x_end - x_start; + size_t x_nudge = bsd.xdim - x_count; + + size_t y_size = img.dim_y; + size_t y_start = ypos; + size_t y_end = astc::min(y_size, ypos + bsd.ydim); + size_t y_count = y_end - y_start; + size_t y_nudge = (bsd.ydim - y_count) * bsd.xdim; + + size_t z_size = img.dim_z; + size_t z_start = zpos; + size_t z_end = astc::min(z_size, zpos + bsd.zdim); // True if any non-identity swizzle bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) || @@ -368,19 +368,19 @@ void store_image_block( int idx = 0; if (img.data_type == ASTCENC_TYPE_U8) { - for (unsigned int z = z_start; z < z_end; z++) + for (size_t z = z_start; z < z_end; z++) { // Fetch the image plane uint8_t* data8 = static_cast(img.data[z]); - for (unsigned int y = y_start; y < y_end; y++) + for (size_t y = y_start; y < y_end; y++) { uint8_t* data8_row = data8 + (4 * x_size * y) + (4 * x_start); - for (unsigned int x = 0; x < x_count; x += ASTCENC_SIMD_WIDTH) + for (size_t x = 0; x < x_count; x += ASTCENC_SIMD_WIDTH) { - unsigned int max_texels = ASTCENC_SIMD_WIDTH; - unsigned int used_texels = astc::min(x_count - x, max_texels); + size_t max_texels = ASTCENC_SIMD_WIDTH; + size_t used_texels = astc::min(x_count - x, max_texels); // Unaligned load as rows are not always SIMD_WIDTH long vfloat data_r(blk.data_r + idx); @@ -445,16 +445,16 @@ void store_image_block( } else if (img.data_type == ASTCENC_TYPE_F16) { - for (unsigned int z = z_start; z < z_end; z++) + for (size_t z = z_start; z < z_end; z++) { // Fetch the image plane uint16_t* data16 = static_cast(img.data[z]); - for (unsigned int y = y_start; y < y_end; y++) + for (size_t y = y_start; y < y_end; y++) { uint16_t* data16_row = data16 + (4 * x_size * y) + (4 * x_start); - for (unsigned int x = 0; x < x_count; x++) + for (size_t x = 0; x < x_count; x++) { vint4 color; @@ -507,16 +507,16 @@ void store_image_block( { assert(img.data_type == ASTCENC_TYPE_F32); - for (unsigned int z = z_start; z < z_end; z++) + for (size_t z = z_start; z < z_end; z++) { // Fetch the image plane float* data32 = static_cast(img.data[z]); - for (unsigned int y = y_start; y < y_end; y++) + for (size_t y = y_start; y < y_end; y++) { float* data32_row = data32 + (4 * x_size * y) + (4 * x_start); - for (unsigned int x = 0; x < x_count; x++) + for (size_t x = 0; x < x_count; x++) { vfloat4 color = blk.texel(idx); diff --git a/Source/astcenc_integer_sequence.cpp b/Source/astcenc_integer_sequence.cpp index 41dc38b7..ab776979 100644 --- a/Source/astcenc_integer_sequence.cpp +++ b/Source/astcenc_integer_sequence.cpp @@ -416,8 +416,8 @@ static const std::array ise_sizes {{ }}; /* See header for documentation. */ -unsigned int get_ise_sequence_bitcount( - unsigned int character_count, +size_t get_ise_sequence_bitcount( + size_t character_count, quant_method quant_level ) { // Cope with out-of bounds values - input might be invalid @@ -428,7 +428,7 @@ unsigned int get_ise_sequence_bitcount( } auto& entry = ise_sizes[quant_level]; - unsigned int divisor = (entry.divisor << 1) + 1; + size_t divisor = (entry.divisor << 1) + 1; return (entry.scale * character_count + divisor - 1) / divisor; } @@ -444,12 +444,12 @@ unsigned int get_ise_sequence_bitcount( * @param[in,out] ptr The data pointer to write to. */ static inline void write_bits( - unsigned int value, - unsigned int bitcount, - unsigned int bitoffset, + size_t value, + size_t bitcount, + size_t bitoffset, uint8_t ptr[2] ) { - unsigned int mask = (1 << bitcount) - 1; + size_t mask = (1 << bitcount) - 1; value &= mask; ptr += bitoffset >> 3; bitoffset &= 7; @@ -475,15 +475,15 @@ static inline void write_bits( * * @return The read value. */ -static inline unsigned int read_bits( - unsigned int bitcount, - unsigned int bitoffset, +static inline size_t read_bits( + size_t bitcount, + size_t bitoffset, const uint8_t* ptr ) { - unsigned int mask = (1 << bitcount) - 1; + size_t mask = (1 << bitcount) - 1; ptr += bitoffset >> 3; bitoffset &= 7; - unsigned int value = ptr[0] | (ptr[1] << 8); + size_t value = ptr[0] | (ptr[1] << 8); value >>= bitoffset; value &= mask; return value; @@ -492,31 +492,31 @@ static inline unsigned int read_bits( /* See header for documentation. */ void encode_ise( quant_method quant_level, - unsigned int character_count, + size_t character_count, const uint8_t* input_data, uint8_t* output_data, - unsigned int bit_offset + size_t bit_offset ) { promise(character_count > 0); - unsigned int bits = btq_counts[quant_level].bits; - unsigned int trits = btq_counts[quant_level].trits; - unsigned int quints = btq_counts[quant_level].quints; - unsigned int mask = (1 << bits) - 1; + size_t bits = btq_counts[quant_level].bits; + size_t trits = btq_counts[quant_level].trits; + size_t quints = btq_counts[quant_level].quints; + size_t mask = (1 << bits) - 1; // Write out trits and bits if (trits) { - unsigned int i = 0; - unsigned int full_trit_blocks = character_count / 5; + size_t i = 0; + size_t full_trit_blocks = character_count / 5; - for (unsigned int j = 0; j < full_trit_blocks; j++) + for (size_t j = 0; j < full_trit_blocks; j++) { - unsigned int i4 = input_data[i + 4] >> bits; - unsigned int i3 = input_data[i + 3] >> bits; - unsigned int i2 = input_data[i + 2] >> bits; - unsigned int i1 = input_data[i + 1] >> bits; - unsigned int i0 = input_data[i + 0] >> bits; + size_t i4 = input_data[i + 4] >> bits; + size_t i3 = input_data[i + 3] >> bits; + size_t i2 = input_data[i + 2] >> bits; + size_t i1 = input_data[i + 1] >> bits; + size_t i0 = input_data[i + 0] >> bits; uint8_t T = integer_of_trits[i4][i3][i2][i1][i0]; @@ -555,15 +555,15 @@ void encode_ise( { // i4 cannot be present - we know the block is partial // i0 must be present - we know the block isn't empty - unsigned int i4 = 0; - unsigned int i3 = i + 3 >= character_count ? 0 : input_data[i + 3] >> bits; - unsigned int i2 = i + 2 >= character_count ? 0 : input_data[i + 2] >> bits; - unsigned int i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits; - unsigned int i0 = input_data[i + 0] >> bits; + size_t i4 = 0; + size_t i3 = i + 3 >= character_count ? 0 : input_data[i + 3] >> bits; + size_t i2 = i + 2 >= character_count ? 0 : input_data[i + 2] >> bits; + size_t i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits; + size_t i0 = input_data[i + 0] >> bits; uint8_t T = integer_of_trits[i4][i3][i2][i1][i0]; - for (unsigned int j = 0; i < character_count; i++, j++) + for (size_t j = 0; i < character_count; i++, j++) { // Truncated table as this iteration is always partital static const uint8_t tbits[4] { 2, 2, 1, 2 }; @@ -580,14 +580,14 @@ void encode_ise( // Write out quints and bits else if (quints) { - unsigned int i = 0; - unsigned int full_quint_blocks = character_count / 3; + size_t i = 0; + size_t full_quint_blocks = character_count / 3; - for (unsigned int j = 0; j < full_quint_blocks; j++) + for (size_t j = 0; j < full_quint_blocks; j++) { - unsigned int i2 = input_data[i + 2] >> bits; - unsigned int i1 = input_data[i + 1] >> bits; - unsigned int i0 = input_data[i + 0] >> bits; + size_t i2 = input_data[i + 2] >> bits; + size_t i1 = input_data[i + 1] >> bits; + size_t i0 = input_data[i + 0] >> bits; uint8_t T = integer_of_quints[i2][i1][i0]; @@ -616,13 +616,13 @@ void encode_ise( { // i2 cannot be present - we know the block is partial // i0 must be present - we know the block isn't empty - unsigned int i2 = 0; - unsigned int i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits; - unsigned int i0 = input_data[i + 0] >> bits; + size_t i2 = 0; + size_t i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits; + size_t i0 = input_data[i + 0] >> bits; uint8_t T = integer_of_quints[i2][i1][i0]; - for (unsigned int j = 0; i < character_count; i++, j++) + for (size_t j = 0; i < character_count; i++, j++) { // Truncated table as this iteration is always partital static const uint8_t tbits[2] { 3, 2 }; @@ -639,7 +639,7 @@ void encode_ise( // Write out just bits else { - for (unsigned int i = 0; i < character_count; i++) + for (size_t i = 0; i < character_count; i++) { write_bits(input_data[i], bits, bit_offset, output_data); bit_offset += bits; @@ -650,10 +650,10 @@ void encode_ise( /* See header for documentation. */ void decode_ise( quant_method quant_level, - unsigned int character_count, + size_t character_count, const uint8_t* input_data, uint8_t* output_data, - unsigned int bit_offset + size_t bit_offset ) { promise(character_count > 0); @@ -663,15 +663,15 @@ void decode_ise( uint8_t results[68]; uint8_t tq_blocks[22] { 0 }; // Trit-blocks or quint-blocks, must be zeroed - unsigned int bits = btq_counts[quant_level].bits; - unsigned int trits = btq_counts[quant_level].trits; - unsigned int quints = btq_counts[quant_level].quints; + size_t bits = btq_counts[quant_level].bits; + size_t trits = btq_counts[quant_level].trits; + size_t quints = btq_counts[quant_level].quints; - unsigned int lcounter = 0; - unsigned int hcounter = 0; + size_t lcounter = 0; + size_t hcounter = 0; // Collect bits for each element, as well as bits for any trit-blocks and quint-blocks. - for (unsigned int i = 0; i < character_count; i++) + for (size_t i = 0; i < character_count; i++) { results[i] = static_cast(read_bits(bits, bit_offset, input_data)); bit_offset += bits; @@ -682,7 +682,7 @@ void decode_ise( static const uint8_t block_shift[5] { 0, 2, 4, 5, 7 }; static const uint8_t next_lcounter[5] { 1, 2, 3, 4, 0 }; static const uint8_t hcounter_incr[5] { 0, 0, 0, 0, 1 }; - unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data); + size_t tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data); bit_offset += bits_to_read[lcounter]; tq_blocks[hcounter] |= tdata << block_shift[lcounter]; hcounter += hcounter_incr[lcounter]; @@ -695,7 +695,7 @@ void decode_ise( static const uint8_t block_shift[3] { 0, 3, 5 }; static const uint8_t next_lcounter[3] { 1, 2, 0 }; static const uint8_t hcounter_incr[3] { 0, 0, 1 }; - unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data); + size_t tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data); bit_offset += bits_to_read[lcounter]; tq_blocks[hcounter] |= tdata << block_shift[lcounter]; hcounter += hcounter_incr[lcounter]; @@ -706,9 +706,9 @@ void decode_ise( // Unpack trit-blocks or quint-blocks as needed if (trits) { - unsigned int trit_blocks = (character_count + 4) / 5; + size_t trit_blocks = (character_count + 4) / 5; promise(trit_blocks > 0); - for (unsigned int i = 0; i < trit_blocks; i++) + for (size_t i = 0; i < trit_blocks; i++) { const uint8_t *tritptr = trits_of_integer[tq_blocks[i]]; results[5 * i ] |= tritptr[0] << bits; @@ -721,9 +721,9 @@ void decode_ise( if (quints) { - unsigned int quint_blocks = (character_count + 2) / 3; + size_t quint_blocks = (character_count + 2) / 3; promise(quint_blocks > 0); - for (unsigned int i = 0; i < quint_blocks; i++) + for (size_t i = 0; i < quint_blocks; i++) { const uint8_t *quintptr = quints_of_integer[tq_blocks[i]]; results[3 * i ] |= quintptr[0] << bits; @@ -732,7 +732,7 @@ void decode_ise( } } - for (unsigned int i = 0; i < character_count; i++) + for (size_t i = 0; i < character_count; i++) { output_data[i] = results[i]; } diff --git a/Source/astcenc_internal.h b/Source/astcenc_internal.h index 008e1f5f..bed460ee 100644 --- a/Source/astcenc_internal.h +++ b/Source/astcenc_internal.h @@ -69,31 +69,31 @@ #endif /** @brief The maximum number of texels a block can support (6x6x6 block). */ -static constexpr unsigned int BLOCK_MAX_TEXELS { ASTCENC_BLOCK_MAX_TEXELS }; +static constexpr size_t BLOCK_MAX_TEXELS { ASTCENC_BLOCK_MAX_TEXELS }; /** @brief The maximum number of components a block can support. */ -static constexpr unsigned int BLOCK_MAX_COMPONENTS { 4 }; +static constexpr size_t BLOCK_MAX_COMPONENTS { 4 }; /** @brief The maximum number of partitions a block can support. */ -static constexpr unsigned int BLOCK_MAX_PARTITIONS { 4 }; +static constexpr size_t BLOCK_MAX_PARTITIONS { 4 }; /** @brief The number of partitionings, per partition count, suported by the ASTC format. */ -static constexpr unsigned int BLOCK_MAX_PARTITIONINGS { 1024 }; +static constexpr size_t BLOCK_MAX_PARTITIONINGS { 1024 }; /** @brief The maximum number of texels used during partition selection for texel clustering. */ static constexpr uint8_t BLOCK_MAX_KMEANS_TEXELS { 64 }; /** @brief The maximum number of weights a block can support. */ -static constexpr unsigned int BLOCK_MAX_WEIGHTS { 64 }; +static constexpr size_t BLOCK_MAX_WEIGHTS { 64 }; /** @brief The maximum number of weights a block can support per plane in 2 plane mode. */ -static constexpr unsigned int BLOCK_MAX_WEIGHTS_2PLANE { BLOCK_MAX_WEIGHTS / 2 }; +static constexpr size_t BLOCK_MAX_WEIGHTS_2PLANE { BLOCK_MAX_WEIGHTS / 2 }; /** @brief The minimum number of weight bits a candidate encoding must encode. */ -static constexpr unsigned int BLOCK_MIN_WEIGHT_BITS { 24 }; +static constexpr size_t BLOCK_MIN_WEIGHT_BITS { 24 }; /** @brief The maximum number of weight bits a candidate encoding can encode. */ -static constexpr unsigned int BLOCK_MAX_WEIGHT_BITS { 96 }; +static constexpr size_t BLOCK_MAX_WEIGHT_BITS { 96 }; /** @brief The index indicating a bad (unused) block mode in the remap array. */ static constexpr uint16_t BLOCK_BAD_BLOCK_MODE { 0xFFFFu }; @@ -102,19 +102,19 @@ static constexpr uint16_t BLOCK_BAD_BLOCK_MODE { 0xFFFFu }; static constexpr uint16_t BLOCK_BAD_PARTITIONING { 0xFFFFu }; /** @brief The number of partition index bits supported by the ASTC format . */ -static constexpr unsigned int PARTITION_INDEX_BITS { 10 }; +static constexpr size_t PARTITION_INDEX_BITS { 10 }; /** @brief The offset of the plane 2 weights in shared weight arrays. */ -static constexpr unsigned int WEIGHTS_PLANE2_OFFSET { BLOCK_MAX_WEIGHTS_2PLANE }; +static constexpr size_t WEIGHTS_PLANE2_OFFSET { BLOCK_MAX_WEIGHTS_2PLANE }; /** @brief The sum of quantized weights for one texel. */ static constexpr float WEIGHTS_TEXEL_SUM { 16.0f }; /** @brief The number of block modes supported by the ASTC format. */ -static constexpr unsigned int WEIGHTS_MAX_BLOCK_MODES { 2048 }; +static constexpr size_t WEIGHTS_MAX_BLOCK_MODES { 2048 }; /** @brief The number of weight grid decimation modes supported by the ASTC format. */ -static constexpr unsigned int WEIGHTS_MAX_DECIMATION_MODES { 87 }; +static constexpr size_t WEIGHTS_MAX_DECIMATION_MODES { 87 }; /** @brief The high default error used to initialize error trackers. */ static constexpr float ERROR_CALC_DEFAULT { 1e30f }; @@ -129,14 +129,14 @@ static constexpr float TUNE_MIN_SEARCH_MODE0 { 0.85f }; * * This can be dynamically reduced by the compression quality preset. */ -static constexpr unsigned int TUNE_MAX_TRIAL_CANDIDATES { 8 }; +static constexpr size_t TUNE_MAX_TRIAL_CANDIDATES { 8 }; /** * @brief The maximum number of candidate partitionings tested for each encoding mode. * * This can be dynamically reduced by the compression quality preset. */ -static constexpr unsigned int TUNE_MAX_PARTITIONING_CANDIDATES { 8 }; +static constexpr size_t TUNE_MAX_PARTITIONING_CANDIDATES { 8 }; /** * @brief The maximum quant level using full angular endpoint search method. @@ -151,7 +151,7 @@ static constexpr unsigned int TUNE_MAX_PARTITIONING_CANDIDATES { 8 }; * one 8-wide vector. Decreasing by one doesn't buy much performance, and * increasing by one is disproportionately expensive. */ -static constexpr unsigned int TUNE_MAX_ANGULAR_QUANT { 7 }; /* QUANT_12 */ +static constexpr size_t TUNE_MAX_ANGULAR_QUANT { 7 }; /* QUANT_12 */ static_assert((BLOCK_MAX_TEXELS % ASTCENC_SIMD_WIDTH) == 0, "BLOCK_MAX_TEXELS must be multiple of ASTCENC_SIMD_WIDTH"); @@ -232,7 +232,7 @@ enum quant_method * * @return The number of levels used by @c method. */ -static inline unsigned int get_quant_level(quant_method method) +static inline size_t get_quant_level(quant_method method) { switch (method) { @@ -546,35 +546,35 @@ struct block_size_descriptor * * Always modes are stored at the start of the decimation_modes list. */ - unsigned int decimation_mode_count_always; + size_t decimation_mode_count_always; /** @brief The number of stored decimation modes for selected encodings. */ - unsigned int decimation_mode_count_selected; + size_t decimation_mode_count_selected; /** @brief The number of stored decimation modes for any encoding. */ - unsigned int decimation_mode_count_all; + size_t decimation_mode_count_all; /** * @brief The number of stored block modes which are "always" modes. * * Always modes are stored at the start of the block_modes list. */ - unsigned int block_mode_count_1plane_always; + size_t block_mode_count_1plane_always; /** @brief The number of stored block modes for active 1 plane encodings. */ - unsigned int block_mode_count_1plane_selected; + size_t block_mode_count_1plane_selected; /** @brief The number of stored block modes for active 1 and 2 plane encodings. */ - unsigned int block_mode_count_1plane_2plane_selected; + size_t block_mode_count_1plane_2plane_selected; /** @brief The number of stored block modes for any encoding. */ - unsigned int block_mode_count_all; + size_t block_mode_count_all; /** @brief The number of selected partitionings for 1/2/3/4 partitionings. */ - unsigned int partitioning_count_selected[BLOCK_MAX_PARTITIONS]; + size_t partitioning_count_selected[BLOCK_MAX_PARTITIONS]; /** @brief The number of partitionings for 1/2/3/4 partitionings. */ - unsigned int partitioning_count_all[BLOCK_MAX_PARTITIONS]; + size_t partitioning_count_all[BLOCK_MAX_PARTITIONS]; /** @brief The active decimation modes, stored in low indices. */ decimation_mode decimation_modes[WEIGHTS_MAX_DECIMATION_MODES]; @@ -633,9 +633,9 @@ struct block_size_descriptor * * @return The block mode structure. */ - const block_mode& get_block_mode(unsigned int block_mode) const + const block_mode& get_block_mode(size_t block_mode) const { - unsigned int packed_index = this->block_mode_packed_index[block_mode]; + size_t packed_index = this->block_mode_packed_index[block_mode]; assert(packed_index != BLOCK_BAD_BLOCK_MODE && packed_index < this->block_mode_count_all); return this->block_modes[packed_index]; } @@ -651,7 +651,7 @@ struct block_size_descriptor * * @return The decimation mode structure. */ - const decimation_mode& get_decimation_mode(unsigned int decimation_mode) const + const decimation_mode& get_decimation_mode(size_t decimation_mode) const { return this->decimation_modes[decimation_mode]; } @@ -667,7 +667,7 @@ struct block_size_descriptor * * @return The decimation info structure. */ - const decimation_info& get_decimation_info(unsigned int decimation_mode) const + const decimation_info& get_decimation_info(size_t decimation_mode) const { return this->decimation_tables[decimation_mode]; } @@ -679,13 +679,13 @@ struct block_size_descriptor * * @return The pointer to the table of 1024 entries (for 2/3/4 parts) or 1 entry (for 1 part). */ - const partition_info* get_partition_table(unsigned int partition_count) const + const partition_info* get_partition_table(size_t partition_count) const { if (partition_count == 1) { partition_count = 5; } - unsigned int index = (partition_count - 2) * BLOCK_MAX_PARTITIONINGS; + size_t index = (partition_count - 2) * BLOCK_MAX_PARTITIONINGS; return this->partitionings + index; } @@ -697,9 +697,9 @@ struct block_size_descriptor * * @return The partition info structure. */ - const partition_info& get_partition_info(unsigned int partition_count, unsigned int index) const + const partition_info& get_partition_info(size_t partition_count, size_t index) const { - unsigned int packed_index = 0; + size_t packed_index = 0; if (partition_count >= 2) { packed_index = this->partitioning_packed_index[partition_count - 2][index]; @@ -719,7 +719,7 @@ struct block_size_descriptor * * @return The partition info structure. */ - const partition_info& get_raw_partition_info(unsigned int partition_count, unsigned int packed_index) const + const partition_info& get_raw_partition_info(size_t partition_count, size_t packed_index) const { assert(packed_index != BLOCK_BAD_PARTITIONING && packed_index < this->partitioning_count_all[partition_count - 1]); auto& result = get_partition_table(partition_count)[packed_index]; @@ -786,13 +786,13 @@ struct image_block uint8_t alpha_lns[BLOCK_MAX_TEXELS]; /** @brief The X position of this block in the input or output image. */ - unsigned int xpos; + size_t xpos; /** @brief The Y position of this block in the input or output image. */ - unsigned int ypos; + size_t ypos; /** @brief The Z position of this block in the input or output image. */ - unsigned int zpos; + size_t zpos; /** * @brief Get an RGBA texel value from the data. @@ -801,7 +801,7 @@ struct image_block * * @return The texel in RGBA component ordering. */ - inline vfloat4 texel(unsigned int index) const + inline vfloat4 texel(size_t index) const { return vfloat4(data_r[index], data_g[index], @@ -816,7 +816,7 @@ struct image_block * * @return The texel in RGB0 component ordering. */ - inline vfloat4 texel3(unsigned int index) const + inline vfloat4 texel3(size_t index) const { return vfloat3(data_r[index], data_g[index], @@ -884,7 +884,7 @@ struct image_block struct endpoints { /** @brief The number of partition endpoints stored. */ - unsigned int partition_count; + size_t partition_count; /** @brief The colors for endpoint 0. */ vfloat4 endpt0[BLOCK_MAX_PARTITIONS]; @@ -1146,25 +1146,25 @@ struct pixel_region_args bool have_z; /** @brief The kernel radius for alpha processing. */ - unsigned int alpha_kernel_radius; + size_t alpha_kernel_radius; /** @brief The X dimension of the working data to process. */ - unsigned int size_x; + size_t size_x; /** @brief The Y dimension of the working data to process. */ - unsigned int size_y; + size_t size_y; /** @brief The Z dimension of the working data to process. */ - unsigned int size_z; + size_t size_z; /** @brief The X position of first src and dst data in the data set. */ - unsigned int offset_x; + size_t offset_x; /** @brief The Y position of first src and dst data in the data set. */ - unsigned int offset_y; + size_t offset_y; /** @brief The Z position of first src and dst data in the data set. */ - unsigned int offset_z; + size_t offset_z; /** @brief The working memory buffer. */ vfloat4 *work_memory; @@ -1179,22 +1179,22 @@ struct avg_args pixel_region_args arg; /** @brief The image X dimensions. */ - unsigned int img_size_x; + size_t img_size_x; /** @brief The image Y dimensions. */ - unsigned int img_size_y; + size_t img_size_y; /** @brief The image Z dimensions. */ - unsigned int img_size_z; + size_t img_size_z; /** @brief The maximum working block dimensions in X and Y dimensions. */ - unsigned int blk_size_xy; + size_t blk_size_xy; /** @brief The maximum working block dimensions in Z dimensions. */ - unsigned int blk_size_z; + size_t blk_size_z; /** @brief The working block memory size. */ - unsigned int work_memory_size; + size_t work_memory_size; }; #if defined(ASTCENC_DIAGNOSTICS) @@ -1211,7 +1211,7 @@ struct astcenc_contexti astcenc_config config; /** @brief The thread count supported by this context. */ - unsigned int thread_count; + size_t thread_count; /** @brief The block size descriptor this context was created with. */ block_size_descriptor* bsd; @@ -1263,11 +1263,11 @@ struct astcenc_contexti * @param[out] bsd The descriptor to initialize. */ void init_block_size_descriptor( - unsigned int x_texels, - unsigned int y_texels, - unsigned int z_texels, + size_t x_texels, + size_t y_texels, + size_t z_texels, bool can_omit_modes, - unsigned int partition_count_cutoff, + size_t partition_count_cutoff, float mode_cutoff, block_size_descriptor& bsd); @@ -1284,7 +1284,7 @@ void init_block_size_descriptor( void init_partition_tables( block_size_descriptor& bsd, bool can_omit_partitionings, - unsigned int partition_count_cutoff); + size_t partition_count_cutoff); /** * @brief Get the percentile table for 2D block modes. @@ -1300,8 +1300,8 @@ void init_partition_tables( * @return The unpacked table. */ const float* get_2d_percentile_table( - unsigned int xdim, - unsigned int ydim); + size_t xdim, + size_t ydim); /** * @brief Query if a 2D block size is legal. @@ -1309,8 +1309,8 @@ const float* get_2d_percentile_table( * @return True if legal, false otherwise. */ bool is_legal_2d_block_size( - unsigned int xdim, - unsigned int ydim); + size_t xdim, + size_t ydim); /** * @brief Query if a 3D block size is legal. @@ -1318,9 +1318,9 @@ bool is_legal_2d_block_size( * @return True if legal, false otherwise. */ bool is_legal_3d_block_size( - unsigned int xdim, - unsigned int ydim, - unsigned int zdim); + size_t xdim, + size_t ydim, + size_t zdim); /* ============================================================================ Functionality for managing BISE quantization and unquantization. @@ -1384,10 +1384,10 @@ extern const int8_t quant_mode_table[10][128]; */ void encode_ise( quant_method quant_level, - unsigned int character_count, + size_t character_count, const uint8_t* input_data, uint8_t* output_data, - unsigned int bit_offset); + size_t bit_offset); /** * @brief Decode a packed string using BISE. @@ -1403,10 +1403,10 @@ void encode_ise( */ void decode_ise( quant_method quant_level, - unsigned int character_count, + size_t character_count, const uint8_t* input_data, uint8_t* output_data, - unsigned int bit_offset); + size_t bit_offset); /** * @brief Return the number of bits needed to encode an ISE sequence. @@ -1419,8 +1419,8 @@ void decode_ise( * * @return The number of bits needed to encode the BISE string. */ -unsigned int get_ise_sequence_bitcount( - unsigned int character_count, +size_t get_ise_sequence_bitcount( + size_t character_count, quant_method quant_level); /* ============================================================================ @@ -1441,8 +1441,8 @@ unsigned int get_ise_sequence_bitcount( void compute_avgs_and_dirs_2_comp( const partition_info& pi, const image_block& blk, - unsigned int component1, - unsigned int component2, + size_t component1, + size_t component2, partition_metrics pm[BLOCK_MAX_PARTITIONS]); /** @@ -1458,7 +1458,7 @@ void compute_avgs_and_dirs_2_comp( void compute_avgs_and_dirs_3_comp( const partition_info& pi, const image_block& blk, - unsigned int omitted_component, + size_t omitted_component, partition_metrics pm[BLOCK_MAX_PARTITIONS]); /** @@ -1559,13 +1559,13 @@ void compute_error_squared_rgba( * * @return The actual number of candidates returned. */ -unsigned int find_best_partition_candidates( +size_t find_best_partition_candidates( const block_size_descriptor& bsd, const image_block& blk, - unsigned int partition_count, - unsigned int partition_search_limit, - unsigned int best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES], - unsigned int requested_candidates); + size_t partition_count, + size_t partition_search_limit, + size_t best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES], + size_t requested_candidates); /* ============================================================================ Functionality for managing images and image related data. @@ -1607,9 +1607,9 @@ static inline vmask4 get_u8_component_mask( * * @return The number of tasks in the processing stage. */ -unsigned int init_compute_averages( +size_t init_compute_averages( const astcenc_image& img, - unsigned int alpha_kernel_radius, + size_t alpha_kernel_radius, const astcenc_swizzle& swz, avg_args& ag); @@ -1642,9 +1642,9 @@ void load_image_block( const astcenc_image& img, image_block& blk, const block_size_descriptor& bsd, - unsigned int xpos, - unsigned int ypos, - unsigned int zpos, + size_t xpos, + size_t ypos, + size_t zpos, const astcenc_swizzle& swz); /** @@ -1667,9 +1667,9 @@ void load_image_block_fast_ldr( const astcenc_image& img, image_block& blk, const block_size_descriptor& bsd, - unsigned int xpos, - unsigned int ypos, - unsigned int zpos, + size_t xpos, + size_t ypos, + size_t zpos, const astcenc_swizzle& swz); /** @@ -1687,9 +1687,9 @@ void store_image_block( astcenc_image& img, const image_block& blk, const block_size_descriptor& bsd, - unsigned int xpos, - unsigned int ypos, - unsigned int zpos, + size_t xpos, + size_t ypos, + size_t zpos, const astcenc_swizzle& swz); /* ============================================================================ @@ -1728,7 +1728,7 @@ void compute_ideal_colors_and_weights_1plane( void compute_ideal_colors_and_weights_2planes( const block_size_descriptor& bsd, const image_block& blk, - unsigned int plane2_component, + size_t plane2_component, endpoints_and_weights& ei1, endpoints_and_weights& ei2); @@ -1935,15 +1935,15 @@ void unpack_weights( * * @return The actual number of candidate matches returned. */ -unsigned int compute_ideal_endpoint_formats( +size_t compute_ideal_endpoint_formats( const partition_info& pi, const image_block& blk, const endpoints& ep, const int8_t* qwt_bitcounts, const float* qwt_errors, - unsigned int tune_candidate_limit, - unsigned int start_block_mode, - unsigned int end_block_mode, + size_t tune_candidate_limit, + size_t start_block_mode, + size_t end_block_mode, uint8_t partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS], int block_mode[TUNE_MAX_TRIAL_CANDIDATES], quant_method quant_level[TUNE_MAX_TRIAL_CANDIDATES], @@ -2018,7 +2018,7 @@ void compute_angular_endpoints_1plane( bool only_always, const block_size_descriptor& bsd, const float* dec_weight_ideal_value, - unsigned int max_weight_quant, + size_t max_weight_quant, compression_working_buffers& tmpbuf); /** @@ -2032,7 +2032,7 @@ void compute_angular_endpoints_1plane( void compute_angular_endpoints_2planes( const block_size_descriptor& bsd, const float* dec_weight_ideal_value, - unsigned int max_weight_quant, + size_t max_weight_quant, compression_working_buffers& tmpbuf); /* ============================================================================ diff --git a/Source/astcenc_internal_entry.h b/Source/astcenc_internal_entry.h index 966c1d31..32ab8e5b 100644 --- a/Source/astcenc_internal_entry.h +++ b/Source/astcenc_internal_entry.h @@ -113,13 +113,13 @@ class ParallelManager std::condition_variable m_complete; /** @brief Number of tasks started, but not necessarily finished. */ - std::atomic m_start_count; + std::atomic m_start_count; /** @brief Number of tasks finished. */ - unsigned int m_done_count; + size_t m_done_count; /** @brief Number of tasks that need to be processed. */ - unsigned int m_task_count; + size_t m_task_count; /** @brief Progress callback (optional). */ astcenc_progress_callback m_callback; @@ -178,7 +178,7 @@ class ParallelManager * @param init_func Callable which executes the stage initialization. It must return the * total number of tasks in the stage. */ - void init(std::function init_func) + void init(std::function init_func) { std::lock_guard lck(m_lock); if (!m_init_done) @@ -197,7 +197,7 @@ class ParallelManager * @param task_count Total number of tasks needing processing. * @param callback Function pointer for progress status callbacks. */ - void init(unsigned int task_count, astcenc_progress_callback callback) + void init(size_t task_count, astcenc_progress_callback callback) { std::lock_guard lck(m_lock); if (!m_init_done) @@ -222,9 +222,9 @@ class ParallelManager * * @return Task index of the first assigned task; assigned tasks increment from this. */ - unsigned int get_task_assignment(unsigned int granule, unsigned int& count) + size_t get_task_assignment(size_t granule, size_t& count) { - unsigned int base = m_start_count.fetch_add(granule, std::memory_order_relaxed); + size_t base = m_start_count.fetch_add(granule, std::memory_order_relaxed); if (m_is_cancelled || base >= m_task_count) { count = 0; @@ -243,11 +243,11 @@ class ParallelManager * * @param count The number of completed tasks. */ - void complete_task_assignment(unsigned int count) + void complete_task_assignment(size_t count) { // Note: m_done_count cannot use an atomic without the mutex; this has a race between the // update here and the wait() for other threads - unsigned int local_count; + size_t local_count; float local_last_value; { std::unique_lock lck(m_lock); diff --git a/Source/astcenc_mathlib.h b/Source/astcenc_mathlib.h index 1d73bf1d..e0d9e848 100644 --- a/Source/astcenc_mathlib.h +++ b/Source/astcenc_mathlib.h @@ -131,6 +131,9 @@ typedef union float f; } if32; +// size_t integer literal +constexpr size_t operator "" _z ( unsigned long long n ) { return n; } + // These are namespaced to avoid colliding with C standard library functions. namespace astc { diff --git a/Source/astcenc_partition_tables.cpp b/Source/astcenc_partition_tables.cpp index cad42384..b349fe39 100644 --- a/Source/astcenc_partition_tables.cpp +++ b/Source/astcenc_partition_tables.cpp @@ -36,12 +36,12 @@ * @param[out] bit_pattern The output bit pattern representation. */ static void generate_canonical_partitioning( - unsigned int texel_count, + size_t texel_count, const uint8_t* partition_of_texel, uint64_t bit_pattern[BIT_PATTERN_WORDS] ) { // Clear the pattern - for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++) + for (size_t i = 0; i < BIT_PATTERN_WORDS; i++) { bit_pattern[i] = 0; } @@ -52,12 +52,12 @@ static void generate_canonical_partitioning( int mapped_index[BLOCK_MAX_PARTITIONS]; int map_weight_count = 0; - for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++) + for (size_t i = 0; i < BLOCK_MAX_PARTITIONS; i++) { mapped_index[i] = -1; } - for (unsigned int i = 0; i < texel_count; i++) + for (size_t i = 0; i < texel_count; i++) { int index = partition_of_texel[i]; if (mapped_index[index] < 0) @@ -275,9 +275,9 @@ static uint8_t select_partition( */ static bool generate_one_partition_info_entry( block_size_descriptor& bsd, - unsigned int partition_count, - unsigned int partition_index, - unsigned int partition_remap_index, + size_t partition_count, + size_t partition_index, + size_t partition_remap_index, partition_info& pi ) { int texels_per_block = bsd.texel_count; @@ -288,11 +288,11 @@ static bool generate_one_partition_info_entry( // Assign texels to partitions int texel_idx = 0; int counts[BLOCK_MAX_PARTITIONS] { 0 }; - for (unsigned int z = 0; z < bsd.zdim; z++) + for (size_t z = 0; z < bsd.zdim; z++) { - for (unsigned int y = 0; y < bsd.ydim; y++) + for (size_t y = 0; y < bsd.ydim; y++) { - for (unsigned int x = 0; x < bsd.xdim; x++) + for (size_t x = 0; x < bsd.xdim; x++) { uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block); pi.texels_of_partition[part][counts[part]++] = static_cast(texel_idx++); @@ -302,7 +302,7 @@ static bool generate_one_partition_info_entry( } // Fill loop tail so we can overfetch later - for (unsigned int i = 0; i < partition_count; i++) + for (size_t i = 0; i < partition_count; i++) { int ptex_count = counts[i]; int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count); @@ -352,7 +352,7 @@ static bool generate_one_partition_info_entry( bitmaps = bsd.coverage_bitmaps_4[partition_remap_index]; } - for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++) + for (size_t i = 0; i < BLOCK_MAX_PARTITIONS; i++) { pi.partition_texel_count[i] = static_cast(counts[i]); } @@ -363,15 +363,15 @@ static bool generate_one_partition_info_entry( if (bitmaps) { // Populate the partition coverage bitmap - for (unsigned int i = 0; i < partition_count; i++) + for (size_t i = 0; i < partition_count; i++) { bitmaps[i] = 0ULL; } - unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS); - for (unsigned int i = 0; i < texels_to_process; i++) + size_t texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS); + for (size_t i = 0; i < texels_to_process; i++) { - unsigned int idx = bsd.kmeans_texels[i]; + size_t idx = bsd.kmeans_texels[i]; bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i; } } @@ -382,12 +382,12 @@ static bool generate_one_partition_info_entry( static void build_partition_table_for_one_partition_count( block_size_descriptor& bsd, bool can_omit_partitionings, - unsigned int partition_count_cutoff, - unsigned int partition_count, + size_t partition_count_cutoff, + size_t partition_count, partition_info* ptab, uint64_t* canonical_patterns ) { - unsigned int next_index = 0; + size_t next_index = 0; bsd.partitioning_count_selected[partition_count - 1] = 0; bsd.partitioning_count_all[partition_count - 1] = 0; @@ -400,13 +400,13 @@ static void build_partition_table_for_one_partition_count( // Iterate through twice // - Pass 0: Keep selected partitionings // - Pass 1: Keep non-selected partitionings (skip if in omit mode) - unsigned int max_iter = can_omit_partitionings ? 1 : 2; + size_t max_iter = can_omit_partitionings ? 1 : 2; // Tracker for things we built in the first iteration uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 }; - for (unsigned int x = 0; x < max_iter; x++) + for (size_t x = 0; x < max_iter; x++) { - for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++) + for (size_t i = 0; i < BLOCK_MAX_PARTITIONINGS; i++) { // Don't include things we built in the first pass if ((x == 1) && build[i]) @@ -422,7 +422,7 @@ static void build_partition_table_for_one_partition_count( generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS); bool keep_canonical = true; - for (unsigned int j = 0; j < next_index; j++) + for (size_t j = 0; j < next_index; j++) { bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns + j * BIT_PATTERN_WORDS); if (match) @@ -460,7 +460,7 @@ static void build_partition_table_for_one_partition_count( void init_partition_tables( block_size_descriptor& bsd, bool can_omit_partitionings, - unsigned int partition_count_cutoff + size_t partition_count_cutoff ) { partition_info* par_tab2 = bsd.partitionings; partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS; diff --git a/Source/astcenc_percentile_tables.cpp b/Source/astcenc_percentile_tables.cpp index 448ddcc9..952e91ed 100644 --- a/Source/astcenc_percentile_tables.cpp +++ b/Source/astcenc_percentile_tables.cpp @@ -1163,14 +1163,14 @@ static const packed_percentile_table *get_packed_table( /* See header for documentation. */ const float *get_2d_percentile_table( - unsigned int xdim, - unsigned int ydim + size_t xdim, + size_t ydim ) { float* unpacked_table = new float[WEIGHTS_MAX_BLOCK_MODES]; const packed_percentile_table *apt = get_packed_table(xdim, ydim); // Set the default percentile - for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) + for (size_t i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) { unpacked_table[i] = 1.0f; } @@ -1178,16 +1178,16 @@ const float *get_2d_percentile_table( // Populate the unpacked percentile values for (int i = 0; i < 2; i++) { - unsigned int itemcount = apt->item_count[i]; - unsigned int difscale = apt->difscales[i]; - unsigned int accum = apt->initial_percs[i]; + size_t itemcount = apt->item_count[i]; + size_t difscale = apt->difscales[i]; + size_t accum = apt->initial_percs[i]; const uint16_t *item_ptr = apt->items[i]; - for (unsigned int j = 0; j < itemcount; j++) + for (size_t j = 0; j < itemcount; j++) { uint16_t item = item_ptr[j]; - unsigned int idx = item & 0x7FF; - unsigned int weight = (item >> 11) & 0x1F; + size_t idx = item & 0x7FF; + size_t weight = (item >> 11) & 0x1F; accum += weight; unpacked_table[idx] = static_cast(accum) / static_cast(difscale); } @@ -1199,10 +1199,10 @@ const float *get_2d_percentile_table( /* See header for documentation. */ bool is_legal_2d_block_size( - unsigned int xdim, - unsigned int ydim + size_t xdim, + size_t ydim ) { - unsigned int idx = (xdim << 8) | ydim; + size_t idx = (xdim << 8) | ydim; switch (idx) { case 0x0404: @@ -1227,11 +1227,11 @@ bool is_legal_2d_block_size( /* See header for documentation. */ bool is_legal_3d_block_size( - unsigned int xdim, - unsigned int ydim, - unsigned int zdim + size_t xdim, + size_t ydim, + size_t zdim ) { - unsigned int idx = (xdim << 16) | (ydim << 8) | zdim; + size_t idx = (xdim << 16) | (ydim << 8) | zdim; switch (idx) { case 0x030303: diff --git a/Source/astcenc_pick_best_endpoint_format.cpp b/Source/astcenc_pick_best_endpoint_format.cpp index bf872a92..ae172be3 100644 --- a/Source/astcenc_pick_best_endpoint_format.cpp +++ b/Source/astcenc_pick_best_endpoint_format.cpp @@ -85,7 +85,7 @@ static void compute_error_squared_rgb_single_partition( ) { vfloat4 ews = blk.channel_weight; - unsigned int texel_count = pi.partition_texel_count[partition_index]; + size_t texel_count = pi.partition_texel_count[partition_index]; const uint8_t* texel_indexes = pi.texels_of_partition[partition_index]; promise(texel_count > 0); @@ -121,7 +121,7 @@ static void compute_error_squared_rgb_single_partition( vfloat l_bs2(l_pline.bs.lane<2>()); vint lane_ids = vint::lane_id(); - for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH) { const uint8_t* tix = texel_indexes + i; @@ -1087,16 +1087,16 @@ static float four_partitions_find_best_combination_for_bitcount( } /* See header for documentation. */ -unsigned int compute_ideal_endpoint_formats( +size_t compute_ideal_endpoint_formats( const partition_info& pi, const image_block& blk, const endpoints& ep, // bitcounts and errors computed for the various quantization methods const int8_t* qwt_bitcounts, const float* qwt_errors, - unsigned int tune_candidate_limit, - unsigned int start_block_mode, - unsigned int end_block_mode, + size_t tune_candidate_limit, + size_t start_block_mode, + size_t end_block_mode, // output data uint8_t partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS], int block_mode[TUNE_MAX_TRIAL_CANDIDATES], @@ -1135,13 +1135,13 @@ unsigned int compute_ideal_endpoint_formats( vfloat clear_error(ERROR_CALC_DEFAULT); vint clear_quant(0); - unsigned int packed_start_block_mode = round_down_to_simd_multiple_vla(start_block_mode); + size_t packed_start_block_mode = round_down_to_simd_multiple_vla(start_block_mode); storea(clear_error, errors_of_best_combination + packed_start_block_mode); store_nbytes(clear_quant, best_quant_levels + packed_start_block_mode); store_nbytes(clear_quant, best_quant_levels_mod + packed_start_block_mode); // Ensure that last iteration overstep contains data that will never be picked - unsigned int packed_end_block_mode = round_down_to_simd_multiple_vla(end_block_mode - 1); + size_t packed_end_block_mode = round_down_to_simd_multiple_vla(end_block_mode - 1); storea(clear_error, errors_of_best_combination + packed_end_block_mode); store_nbytes(clear_quant, best_quant_levels + packed_end_block_mode); store_nbytes(clear_quant, best_quant_levels_mod + packed_end_block_mode); @@ -1153,7 +1153,7 @@ unsigned int compute_ideal_endpoint_formats( // The block contains 1 partition if (partition_count == 1) { - for (unsigned int i = start_block_mode; i < end_block_mode; i++) + for (size_t i = start_block_mode; i < end_block_mode; i++) { if (qwt_errors[i] >= ERROR_CALC_DEFAULT) { @@ -1186,7 +1186,7 @@ unsigned int compute_ideal_endpoint_formats( best_error, format_of_choice, combined_best_error, formats_of_choice); assert(start_block_mode == 0); - for (unsigned int i = 0; i < end_block_mode; i++) + for (size_t i = 0; i < end_block_mode; i++) { if (qwt_errors[i] >= ERROR_CALC_DEFAULT) { @@ -1219,7 +1219,7 @@ unsigned int compute_ideal_endpoint_formats( best_error, format_of_choice, combined_best_error, formats_of_choice); assert(start_block_mode == 0); - for (unsigned int i = 0; i < end_block_mode; i++) + for (size_t i = 0; i < end_block_mode; i++) { if (qwt_errors[i] >= ERROR_CALC_DEFAULT) { @@ -1253,7 +1253,7 @@ unsigned int compute_ideal_endpoint_formats( best_error, format_of_choice, combined_best_error, formats_of_choice); assert(start_block_mode == 0); - for (unsigned int i = 0; i < end_block_mode; i++) + for (size_t i = 0; i < end_block_mode; i++) { if (qwt_errors[i] >= ERROR_CALC_DEFAULT) { @@ -1287,14 +1287,14 @@ unsigned int compute_ideal_endpoint_formats( } // Search the remaining results and pick the best candidate modes for trial 1+ - for (unsigned int i = 1; i < tune_candidate_limit; i++) + for (size_t i = 1; i < tune_candidate_limit; i++) { vint vbest_error_index(-1); vfloat vbest_ep_error(ERROR_CALC_DEFAULT); start_block_mode = round_down_to_simd_multiple_vla(start_block_mode); vint lane_ids = vint::lane_id() + vint(start_block_mode); - for (unsigned int j = start_block_mode; j < end_block_mode; j += ASTCENC_SIMD_WIDTH) + for (size_t j = start_block_mode; j < end_block_mode; j += ASTCENC_SIMD_WIDTH) { vfloat err = vfloat(errors_of_best_combination + j); vmask mask = err < vbest_ep_error; @@ -1323,7 +1323,7 @@ unsigned int compute_ideal_endpoint_formats( } } - for (unsigned int i = 0; i < tune_candidate_limit; i++) + for (size_t i = 0; i < tune_candidate_limit; i++) { if (best_error_weights[i] < 0) { diff --git a/Source/astcenc_symbolic_physical.cpp b/Source/astcenc_symbolic_physical.cpp index 45d9abb6..95d8a59d 100644 --- a/Source/astcenc_symbolic_physical.cpp +++ b/Source/astcenc_symbolic_physical.cpp @@ -111,12 +111,12 @@ void symbolic_to_physical( { // There is currently no attempt to coalesce larger void-extents static const uint8_t cbytes[8] { 0xFC, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; - for (unsigned int i = 0; i < 8; i++) + for (size_t i = 0; i < 8; i++) { pcb[i] = cbytes[i]; } - for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++) + for (size_t i = 0; i < BLOCK_MAX_COMPONENTS; i++) { pcb[2 * i + 8] = scb.constant_color[i] & 0xFF; pcb[2 * i + 9] = (scb.constant_color[i] >> 8) & 0xFF; @@ -130,12 +130,12 @@ void symbolic_to_physical( { // There is currently no attempt to coalesce larger void-extents static const uint8_t cbytes[8] { 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; - for (unsigned int i = 0; i < 8; i++) + for (size_t i = 0; i < 8; i++) { pcb[i] = cbytes[i]; } - for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++) + for (size_t i = 0; i < BLOCK_MAX_COMPONENTS; i++) { pcb[2 * i + 8] = scb.constant_color[i] & 0xFF; pcb[2 * i + 9] = (scb.constant_color[i] >> 8) & 0xFF; @@ -144,7 +144,7 @@ void symbolic_to_physical( return; } - unsigned int partition_count = scb.partition_count; + size_t partition_count = scb.partition_count; // Compress the weights. // They are encoded as an ordinary integer-sequence, then bit-reversed @@ -217,7 +217,7 @@ void symbolic_to_physical( // Check endpoint types for each partition to determine the lowest class present int low_class = 4; - for (unsigned int i = 0; i < partition_count; i++) + for (size_t i = 0; i < partition_count; i++) { int class_of_format = scb.color_formats[i] >> 2; low_class = astc::min(class_of_format, low_class); @@ -231,14 +231,14 @@ void symbolic_to_physical( int encoded_type = low_class + 1; int bitpos = 2; - for (unsigned int i = 0; i < partition_count; i++) + for (size_t i = 0; i < partition_count; i++) { int classbit_of_format = (scb.color_formats[i] >> 2) - low_class; encoded_type |= classbit_of_format << bitpos; bitpos++; } - for (unsigned int i = 0; i < partition_count; i++) + for (size_t i = 0; i < partition_count; i++) { int lowbits_of_format = scb.color_formats[i] & 3; encoded_type |= lowbits_of_format << bitpos; @@ -270,7 +270,7 @@ void symbolic_to_physical( int valuecount_to_encode = 0; const uint8_t* pack_table = color_uquant_to_scrambled_pquant_tables[scb.quant_mode - QUANT_6]; - for (unsigned int i = 0; i < scb.partition_count; i++) + for (size_t i = 0; i < scb.partition_count; i++) { int vals = 2 * (scb.color_formats[i] >> 2) + 2; assert(vals <= 8); @@ -369,7 +369,7 @@ void physical_to_symbolic( return; } - unsigned int packed_index = bsd.block_mode_packed_index[block_mode]; + size_t packed_index = bsd.block_mode_packed_index[block_mode]; if (packed_index == BLOCK_BAD_BLOCK_MODE) { scb.block_type = SYM_BTYPE_ERROR; diff --git a/Source/astcenc_vecmathlib.h b/Source/astcenc_vecmathlib.h index e6ae97cc..7bb49c0c 100644 --- a/Source/astcenc_vecmathlib.h +++ b/Source/astcenc_vecmathlib.h @@ -218,9 +218,9 @@ template T gatherf_byte_inds(const float* base, const uint8_t* indic * * @return The rounded value. */ -ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_8(unsigned int count) +ASTCENC_SIMD_INLINE size_t round_down_to_simd_multiple_8(size_t count) { - return count & static_cast(~(8 - 1)); + return count & static_cast(~(8 - 1)); } /** @@ -230,9 +230,9 @@ ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_8(unsigned int coun * * @return The rounded value. */ -ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_4(unsigned int count) +ASTCENC_SIMD_INLINE size_t round_down_to_simd_multiple_4(size_t count) { - return count & static_cast(~(4 - 1)); + return count & static_cast(~(4 - 1)); } /** @@ -244,9 +244,9 @@ ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_4(unsigned int coun * * @return The rounded value. */ -ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_vla(unsigned int count) +ASTCENC_SIMD_INLINE size_t round_down_to_simd_multiple_vla(size_t count) { - return count & static_cast(~(ASTCENC_SIMD_WIDTH - 1)); + return count & static_cast(~(ASTCENC_SIMD_WIDTH - 1)); } /** @@ -258,9 +258,9 @@ ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_vla(unsigned int co * * @return The rounded value. */ -ASTCENC_SIMD_INLINE unsigned int round_up_to_simd_multiple_vla(unsigned int count) +ASTCENC_SIMD_INLINE size_t round_up_to_simd_multiple_vla(size_t count) { - unsigned int multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH; + size_t multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH; return multiples * ASTCENC_SIMD_WIDTH; } diff --git a/Source/astcenc_vecmathlib_avx2_8.h b/Source/astcenc_vecmathlib_avx2_8.h index 4c96c7c5..9e4807f0 100644 --- a/Source/astcenc_vecmathlib_avx2_8.h +++ b/Source/astcenc_vecmathlib_avx2_8.h @@ -292,9 +292,9 @@ ASTCENC_SIMD_INLINE vmask8 operator~(vmask8 a) * * bit0 = lane 0 */ -ASTCENC_SIMD_INLINE unsigned int mask(vmask8 a) +ASTCENC_SIMD_INLINE size_t mask(vmask8 a) { - return static_cast(_mm256_movemask_ps(a.m)); + return static_cast(_mm256_movemask_ps(a.m)); } /** diff --git a/Source/astcenc_vecmathlib_neon_4.h b/Source/astcenc_vecmathlib_neon_4.h index f31063d9..6cd76dc6 100644 --- a/Source/astcenc_vecmathlib_neon_4.h +++ b/Source/astcenc_vecmathlib_neon_4.h @@ -407,7 +407,7 @@ ASTCENC_SIMD_INLINE vmask4 operator~(vmask4 a) * * bit0 = lane 0 */ -ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a) +ASTCENC_SIMD_INLINE size_t mask(vmask4 a) { static const int shifta[4] { 0, 1, 2, 3 }; static const int32x4_t shift = vld1q_s32(shifta); diff --git a/Source/astcenc_vecmathlib_none_4.h b/Source/astcenc_vecmathlib_none_4.h index f25c92be..977f5ac6 100644 --- a/Source/astcenc_vecmathlib_none_4.h +++ b/Source/astcenc_vecmathlib_none_4.h @@ -410,7 +410,7 @@ ASTCENC_SIMD_INLINE vmask4 operator~(vmask4 a) * * bit0 = lane 0 */ -ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a) +ASTCENC_SIMD_INLINE size_t mask(vmask4 a) { return (a.m[0] & 0x1) | (a.m[1] & 0x2) | diff --git a/Source/astcenc_vecmathlib_sse_4.h b/Source/astcenc_vecmathlib_sse_4.h index f6991e46..db1e4d21 100644 --- a/Source/astcenc_vecmathlib_sse_4.h +++ b/Source/astcenc_vecmathlib_sse_4.h @@ -423,9 +423,9 @@ ASTCENC_SIMD_INLINE vmask4 operator~(vmask4 a) * * bit0 = lane 0 */ -ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a) +ASTCENC_SIMD_INLINE size_t mask(vmask4 a) { - return static_cast(_mm_movemask_ps(a.m)); + return static_cast(_mm_movemask_ps(a.m)); } /** diff --git a/Source/astcenc_vecmathlib_sve_8.h b/Source/astcenc_vecmathlib_sve_8.h index 1e98df02..e4e8a0b4 100644 --- a/Source/astcenc_vecmathlib_sve_8.h +++ b/Source/astcenc_vecmathlib_sve_8.h @@ -287,12 +287,12 @@ ASTCENC_SIMD_INLINE vmask8 operator~(vmask8 a) * * bit0 = lane 0 */ -ASTCENC_SIMD_INLINE unsigned int mask(vmask8 a) +ASTCENC_SIMD_INLINE size_t mask(vmask8 a) { alignas(32) const int shifta[8] { 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 }; svint32_8_t template_vals = svld1_s32(svptrue_b32(), shifta); svint32_8_t active_vals = svsel_s32(a.m, template_vals, svdup_s32(0)); - return static_cast(svaddv_s32(svptrue_b32(), active_vals)); + return static_cast(svaddv_s32(svptrue_b32(), active_vals)); } /** diff --git a/Source/astcenc_weight_align.cpp b/Source/astcenc_weight_align.cpp index b2054164..660d5c05 100644 --- a/Source/astcenc_weight_align.cpp +++ b/Source/astcenc_weight_align.cpp @@ -45,7 +45,7 @@ #include #include -static constexpr unsigned int ANGULAR_STEPS { 32 }; +static constexpr size_t ANGULAR_STEPS { 32 }; static_assert((ANGULAR_STEPS % ASTCENC_SIMD_WIDTH) == 0, "ANGULAR_STEPS must be multiple of ASTCENC_SIMD_WIDTH"); @@ -55,7 +55,7 @@ static_assert(ANGULAR_STEPS >= 32, // Store a reduced sin/cos table for 64 possible weight values; this causes // slight quality loss compared to using sin() and cos() directly. Must be 2^N. -static constexpr unsigned int SINCOS_STEPS { 64 }; +static constexpr size_t SINCOS_STEPS { 64 }; static const uint8_t steps_for_quant_level[12] { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 32 @@ -71,11 +71,11 @@ ASTCENC_ALIGNAS static float cos_table[SINCOS_STEPS][ANGULAR_STEPS]; /* See header for documentation. */ void prepare_angular_tables() { - for (unsigned int i = 0; i < ANGULAR_STEPS; i++) + for (size_t i = 0; i < ANGULAR_STEPS; i++) { float angle_step = static_cast(i + 1); - for (unsigned int j = 0; j < SINCOS_STEPS; j++) + for (size_t j = 0; j < SINCOS_STEPS; j++) { sin_table[j][i] = static_cast(sinf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast(j))); cos_table[j][i] = static_cast(cosf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast(j))); @@ -92,9 +92,9 @@ void prepare_angular_tables() * @param[out] offsets The output angular offsets array. */ static void compute_angular_offsets( - unsigned int weight_count, + size_t weight_count, const float* dec_weight_ideal_value, - unsigned int max_angular_steps, + size_t max_angular_steps, float* offsets ) { promise(weight_count > 0); @@ -103,7 +103,7 @@ static void compute_angular_offsets( ASTCENC_ALIGNAS int isamplev[BLOCK_MAX_WEIGHTS]; // Precompute isample; arrays are always allocated 64 elements long - for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) { // Ideal weight can be outside [0, 1] range, so clamp to fit table vfloat ideal_weight = clampzo(loada(dec_weight_ideal_value + i)); @@ -117,12 +117,12 @@ static void compute_angular_offsets( // Arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max vfloat mult(1.0f / (2.0f * astc::PI)); - for (unsigned int i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH) { vfloat anglesum_x = vfloat::zero(); vfloat anglesum_y = vfloat::zero(); - for (unsigned int j = 0; j < weight_count; j++) + for (size_t j = 0; j < weight_count; j++) { int isample = isamplev[j]; anglesum_x += loada(cos_table[isample] + i); @@ -154,10 +154,10 @@ static void compute_angular_offsets( * @param[out] cut_high_weight_error Per angular step, the high weight cut error. */ static void compute_lowest_and_highest_weight( - unsigned int weight_count, + size_t weight_count, const float* dec_weight_ideal_value, - unsigned int max_angular_steps, - unsigned int max_quant_steps, + size_t max_angular_steps, + size_t max_quant_steps, const float* offsets, float* lowest_weight, int* weight_span, @@ -177,7 +177,7 @@ static void compute_lowest_and_highest_weight( vfloat max_weight(-FLT_MAX); vint lane_id = vint::lane_id(); - for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) + for (size_t i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) { vmask active = lane_id < vint(weight_count); lane_id += vint(ASTCENC_SIMD_WIDTH); @@ -191,7 +191,7 @@ static void compute_lowest_and_highest_weight( max_weight = hmax(max_weight); // Arrays are ANGULAR_STEPS long, so always safe to run full vectors - for (unsigned int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH) + for (size_t sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH) { vfloat errval = vfloat::zero(); vfloat cut_low_weight_err = vfloat::zero(); @@ -203,7 +203,7 @@ static void compute_lowest_and_highest_weight( vfloat minidx = round(min_weight * rcp_stepsize - offset); vfloat maxidx = round(max_weight * rcp_stepsize - offset); - for (unsigned int j = 0; j < weight_count; j++) + for (size_t j = 0; j < weight_count; j++) { vfloat sval = load1(dec_weight_ideal_value + j) * rcp_stepsize - offset; vfloat svalrte = round(sval); @@ -250,14 +250,14 @@ static void compute_lowest_and_highest_weight( * @param[out] high_value Per angular step, the highest weight value. */ static void compute_angular_endpoints_for_quant_levels( - unsigned int weight_count, + size_t weight_count, const float* dec_weight_ideal_value, - unsigned int max_quant_level, + size_t max_quant_level, float low_value[TUNE_MAX_ANGULAR_QUANT + 1], float high_value[TUNE_MAX_ANGULAR_QUANT + 1] ) { - unsigned int max_quant_steps = steps_for_quant_level[max_quant_level]; - unsigned int max_angular_steps = steps_for_quant_level[max_quant_level]; + size_t max_quant_steps = steps_for_quant_level[max_quant_level]; + size_t max_angular_steps = steps_for_quant_level[max_quant_level]; ASTCENC_ALIGNAS float angular_offsets[ANGULAR_STEPS]; @@ -282,7 +282,7 @@ static void compute_angular_endpoints_for_quant_levels( // Initialize the array to some safe defaults promise(max_quant_steps > 0); - for (unsigned int i = 0; i < (max_quant_steps + 4); i++) + for (size_t i = 0; i < (max_quant_steps + 4); i++) { // Lane<0> = Best error // Lane<1> = Best scale; -1 indicates no solution found @@ -291,7 +291,7 @@ static void compute_angular_endpoints_for_quant_levels( } promise(max_angular_steps > 0); - for (unsigned int i = 0; i < max_angular_steps; i++) + for (size_t i = 0; i < max_angular_steps; i++) { float i_flt = static_cast(i); @@ -325,9 +325,9 @@ static void compute_angular_endpoints_for_quant_levels( best_results[idx_span - 2] = select(best_result, new_result, mask); } - for (unsigned int i = 0; i <= max_quant_level; i++) + for (size_t i = 0; i <= max_quant_level; i++) { - unsigned int q = steps_for_quant_level[i]; + size_t q = steps_for_quant_level[i]; int bsi = static_cast(best_results[q].lane<1>()); // Did we find anything? @@ -355,7 +355,7 @@ void compute_angular_endpoints_1plane( bool only_always, const block_size_descriptor& bsd, const float* dec_weight_ideal_value, - unsigned int max_weight_quant, + size_t max_weight_quant, compression_working_buffers& tmpbuf ) { float (&low_value)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1; @@ -364,10 +364,10 @@ void compute_angular_endpoints_1plane( float (&low_values)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_low_values1; float (&high_values)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_high_values1; - unsigned int max_decimation_modes = only_always ? bsd.decimation_mode_count_always + size_t max_decimation_modes = only_always ? bsd.decimation_mode_count_always : bsd.decimation_mode_count_selected; promise(max_decimation_modes > 0); - for (unsigned int i = 0; i < max_decimation_modes; i++) + for (size_t i = 0; i < max_decimation_modes; i++) { const decimation_mode& dm = bsd.decimation_modes[i]; if (!dm.is_ref_1plane(static_cast(max_weight_quant))) @@ -375,9 +375,9 @@ void compute_angular_endpoints_1plane( continue; } - unsigned int weight_count = bsd.get_decimation_info(i).weight_count; + size_t weight_count = bsd.get_decimation_info(i).weight_count; - unsigned int max_precision = dm.maxprec_1plane; + size_t max_precision = dm.maxprec_1plane; if (max_precision > TUNE_MAX_ANGULAR_QUANT) { max_precision = TUNE_MAX_ANGULAR_QUANT; @@ -394,16 +394,16 @@ void compute_angular_endpoints_1plane( max_precision, low_values[i], high_values[i]); } - unsigned int max_block_modes = only_always ? bsd.block_mode_count_1plane_always + size_t max_block_modes = only_always ? bsd.block_mode_count_1plane_always : bsd.block_mode_count_1plane_selected; promise(max_block_modes > 0); - for (unsigned int i = 0; i < max_block_modes; i++) + for (size_t i = 0; i < max_block_modes; i++) { const block_mode& bm = bsd.block_modes[i]; assert(!bm.is_dual_plane); - unsigned int quant_mode = bm.quant_mode; - unsigned int decim_mode = bm.decimation_mode; + size_t quant_mode = bm.quant_mode; + size_t decim_mode = bm.decimation_mode; if (quant_mode <= TUNE_MAX_ANGULAR_QUANT) { @@ -422,7 +422,7 @@ void compute_angular_endpoints_1plane( void compute_angular_endpoints_2planes( const block_size_descriptor& bsd, const float* dec_weight_ideal_value, - unsigned int max_weight_quant, + size_t max_weight_quant, compression_working_buffers& tmpbuf ) { float (&low_value1)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1; @@ -436,7 +436,7 @@ void compute_angular_endpoints_2planes( float (&high_values2)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_high_values2; promise(bsd.decimation_mode_count_selected > 0); - for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++) + for (size_t i = 0; i < bsd.decimation_mode_count_selected; i++) { const decimation_mode& dm = bsd.decimation_modes[i]; if (!dm.is_ref_2plane(static_cast(max_weight_quant))) @@ -444,9 +444,9 @@ void compute_angular_endpoints_2planes( continue; } - unsigned int weight_count = bsd.get_decimation_info(i).weight_count; + size_t weight_count = bsd.get_decimation_info(i).weight_count; - unsigned int max_precision = dm.maxprec_2planes; + size_t max_precision = dm.maxprec_2planes; if (max_precision > TUNE_MAX_ANGULAR_QUANT) { max_precision = TUNE_MAX_ANGULAR_QUANT; @@ -468,13 +468,13 @@ void compute_angular_endpoints_2planes( max_precision, low_values2[i], high_values2[i]); } - unsigned int start = bsd.block_mode_count_1plane_selected; - unsigned int end = bsd.block_mode_count_1plane_2plane_selected; - for (unsigned int i = start; i < end; i++) + size_t start = bsd.block_mode_count_1plane_selected; + size_t end = bsd.block_mode_count_1plane_2plane_selected; + for (size_t i = start; i < end; i++) { const block_mode& bm = bsd.block_modes[i]; - unsigned int quant_mode = bm.quant_mode; - unsigned int decim_mode = bm.decimation_mode; + size_t quant_mode = bm.quant_mode; + size_t decim_mode = bm.decimation_mode; if (quant_mode <= TUNE_MAX_ANGULAR_QUANT) { diff --git a/Source/astcenccli_error_metrics.cpp b/Source/astcenccli_error_metrics.cpp index 4e01a9ee..82eecc20 100644 --- a/Source/astcenccli_error_metrics.cpp +++ b/Source/astcenccli_error_metrics.cpp @@ -126,30 +126,30 @@ void compute_error_metrics( double mean_angular_errorsum = 0.0; double worst_angular_errorsum = 0.0; - unsigned int dim_x = astc::min(img1->dim_x, img2->dim_x); - unsigned int dim_y = astc::min(img1->dim_y, img2->dim_y); - unsigned int dim_z = astc::min(img1->dim_z, img2->dim_z); + size_t dim_x = astc::min(img1->dim_x, img2->dim_x); + size_t dim_y = astc::min(img1->dim_y, img2->dim_y); + size_t dim_z = astc::min(img1->dim_z, img2->dim_z); if (img1->dim_x != img2->dim_x || img1->dim_y != img2->dim_y || img1->dim_z != img2->dim_z) { printf("WARNING: Only intersection of images will be compared:\n" - " Image 1: %dx%dx%d\n" - " Image 2: %dx%dx%d\n", + " Image 1: %zux%zux%zu\n" + " Image 2: %zux%zux%zu\n", img1->dim_x, img1->dim_y, img1->dim_z, img2->dim_x, img2->dim_y, img2->dim_z); } double rgb_peak = 0.0; - unsigned int xsize1 = img1->dim_x; - unsigned int xsize2 = img2->dim_x; + size_t xsize1 = img1->dim_x; + size_t xsize2 = img2->dim_x; - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { vfloat4 color1; vfloat4 color2; diff --git a/Source/astcenccli_image.cpp b/Source/astcenccli_image.cpp index 237da60c..cbb78d80 100644 --- a/Source/astcenccli_image.cpp +++ b/Source/astcenccli_image.cpp @@ -26,10 +26,10 @@ /* See header for documentation. */ astcenc_image *alloc_image( - unsigned int bitness, - unsigned int dim_x, - unsigned int dim_y, - unsigned int dim_z + size_t bitness, + size_t dim_x, + size_t dim_y, + size_t dim_z ) { astcenc_image *img = new astcenc_image; img->dim_x = dim_x; @@ -42,7 +42,7 @@ astcenc_image *alloc_image( if (bitness == 8) { img->data_type = ASTCENC_TYPE_U8; - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { data[z] = new uint8_t[dim_x * dim_y * 4]; } @@ -50,7 +50,7 @@ astcenc_image *alloc_image( else if (bitness == 16) { img->data_type = ASTCENC_TYPE_F16; - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { data[z] = new uint16_t[dim_x * dim_y * 4]; } @@ -59,7 +59,7 @@ astcenc_image *alloc_image( { assert(bitness == 32); img->data_type = ASTCENC_TYPE_F32; - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { data[z] = new float[dim_x * dim_y * 4]; } @@ -76,7 +76,7 @@ void free_image(astcenc_image * img) return; } - for (unsigned int z = 0; z < img->dim_z; z++) + for (size_t z = 0; z < img->dim_z; z++) { delete[] reinterpret_cast(img->data[z]); } @@ -88,9 +88,9 @@ void free_image(astcenc_image * img) /* See header for documentation. */ int determine_image_components(const astcenc_image * img) { - unsigned int dim_x = img->dim_x; - unsigned int dim_y = img->dim_y; - unsigned int dim_z = img->dim_z; + size_t dim_x = img->dim_x; + size_t dim_y = img->dim_y; + size_t dim_z = img->dim_z; // Scan through the image data to determine how many color components the image has bool is_luma = true; @@ -98,13 +98,13 @@ int determine_image_components(const astcenc_image * img) if (img->data_type == ASTCENC_TYPE_U8) { - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { uint8_t* data8 = static_cast(img->data[z]); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { int r = data8[(4 * dim_x * y) + (4 * x )]; int g = data8[(4 * dim_x * y) + (4 * x + 1)]; @@ -119,13 +119,13 @@ int determine_image_components(const astcenc_image * img) } else if (img->data_type == ASTCENC_TYPE_F16) { - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { uint16_t* data16 = static_cast(img->data[z]); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { int r = data16[(4 * dim_x * y) + (4 * x )]; int g = data16[(4 * dim_x * y) + (4 * x + 1)]; @@ -143,13 +143,13 @@ int determine_image_components(const astcenc_image * img) { assert(img->data_type == ASTCENC_TYPE_F32); - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { float* data32 = static_cast(img->data[z]); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { float r = data32[(4 * dim_x * y) + (4 * x )]; float g = data32[(4 * dim_x * y) + (4 * x + 1)]; @@ -170,19 +170,19 @@ int determine_image_components(const astcenc_image * img) /* See header for documentation. */ astcenc_image* astc_img_from_floatx4_array( const float* data, - unsigned int dim_x, - unsigned int dim_y, + size_t dim_x, + size_t dim_y, bool y_flip ) { astcenc_image* img = alloc_image(16, dim_x, dim_y, 1); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { uint16_t* data16 = static_cast(img->data[0]); - unsigned int y_src = y_flip ? (dim_y - y - 1) : y; + size_t y_src = y_flip ? (dim_y - y - 1) : y; const float* src = data + 4 * dim_x * y_src; - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { vint4 colorf16 = float_to_float16(vfloat4( src[4 * x ], @@ -204,19 +204,19 @@ astcenc_image* astc_img_from_floatx4_array( /* See header for documentation. */ astcenc_image* astc_img_from_unorm8x4_array( const uint8_t* data, - unsigned int dim_x, - unsigned int dim_y, + size_t dim_x, + size_t dim_y, bool y_flip ) { astcenc_image* img = alloc_image(8, dim_x, dim_y, 1); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { uint8_t* data8 = static_cast(img->data[0]); - unsigned int y_src = y_flip ? (dim_y - y - 1) : y; + size_t y_src = y_flip ? (dim_y - y - 1) : y; const uint8_t* src = data + 4 * dim_x * y_src; - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { data8[(4 * dim_x * y) + (4 * x )] = src[4 * x ]; data8[(4 * dim_x * y) + (4 * x + 1)] = src[4 * x + 1]; @@ -234,10 +234,10 @@ astcenc_image* astc_img_from_unorm8x4_array( float* floatx4_array_from_astc_img( const astcenc_image* img, bool y_flip, - unsigned int z_index + size_t z_index ) { - unsigned int dim_x = img->dim_x; - unsigned int dim_y = img->dim_y; + size_t dim_x = img->dim_x; + size_t dim_y = img->dim_y; float *buf = new float[4 * dim_x * dim_y]; assert(z_index < img->dim_z); @@ -245,12 +245,12 @@ float* floatx4_array_from_astc_img( if (img->data_type == ASTCENC_TYPE_U8) { uint8_t* data8 = static_cast(img->data[z_index]); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { - unsigned int ymod = y_flip ? dim_y - y - 1 : y; + size_t ymod = y_flip ? dim_y - y - 1 : y; float* dst = buf + y * dim_x * 4; - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { dst[4 * x ] = data8[(4 * dim_x * ymod) + (4 * x )] * (1.0f / 255.0f); dst[4 * x + 1] = data8[(4 * dim_x * ymod) + (4 * x + 1)] * (1.0f / 255.0f); @@ -262,12 +262,12 @@ float* floatx4_array_from_astc_img( else if (img->data_type == ASTCENC_TYPE_F16) { uint16_t* data16 = static_cast(img->data[z_index]); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { - unsigned int ymod = y_flip ? dim_y - y - 1 : y; + size_t ymod = y_flip ? dim_y - y - 1 : y; float *dst = buf + y * dim_x * 4; - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { vint4 colori( data16[(4 * dim_x * ymod) + (4 * x )], @@ -285,12 +285,12 @@ float* floatx4_array_from_astc_img( { assert(img->data_type == ASTCENC_TYPE_F32); float* data32 = static_cast(img->data[z_index]); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { - unsigned int ymod = y_flip ? dim_y - y - 1 : y; + size_t ymod = y_flip ? dim_y - y - 1 : y; float *dst = buf + y * dim_x * 4; - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { dst[4 * x ] = data32[(4 * dim_x * ymod) + (4 * x )]; dst[4 * x + 1] = data32[(4 * dim_x * ymod) + (4 * x + 1)]; @@ -308,19 +308,19 @@ uint8_t* unorm8x4_array_from_astc_img( const astcenc_image* img, bool y_flip ) { - unsigned int dim_x = img->dim_x; - unsigned int dim_y = img->dim_y; + size_t dim_x = img->dim_x; + size_t dim_y = img->dim_y; uint8_t* buf = new uint8_t[4 * dim_x * dim_y]; if (img->data_type == ASTCENC_TYPE_U8) { uint8_t* data8 = static_cast(img->data[0]); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { - unsigned int ymod = y_flip ? dim_y - y - 1 : y; + size_t ymod = y_flip ? dim_y - y - 1 : y; uint8_t* dst = buf + y * dim_x * 4; - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { dst[4 * x ] = data8[(4 * dim_x * ymod) + (4 * x )]; dst[4 * x + 1] = data8[(4 * dim_x * ymod) + (4 * x + 1)]; @@ -332,12 +332,12 @@ uint8_t* unorm8x4_array_from_astc_img( else if (img->data_type == ASTCENC_TYPE_F16) { uint16_t* data16 = static_cast(img->data[0]); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { - unsigned int ymod = y_flip ? dim_y - y - 1 : y; + size_t ymod = y_flip ? dim_y - y - 1 : y; uint8_t* dst = buf + y * dim_x * 4; - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { vint4 colori( data16[(4 * dim_x * ymod) + (4 * x )], @@ -358,12 +358,12 @@ uint8_t* unorm8x4_array_from_astc_img( { assert(img->data_type == ASTCENC_TYPE_F32); float* data32 = static_cast(img->data[0]); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { - unsigned int ymod = y_flip ? dim_y - y - 1 : y; + size_t ymod = y_flip ? dim_y - y - 1 : y; uint8_t* dst = buf + y * dim_x * 4; - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { dst[4 * x ] = static_cast(astc::flt2int_rtn(astc::clamp1f(data32[(4 * dim_x * ymod) + (4 * x )]) * 255.0f)); dst[4 * x + 1] = static_cast(astc::flt2int_rtn(astc::clamp1f(data32[(4 * dim_x * ymod) + (4 * x + 1)]) * 255.0f)); diff --git a/Source/astcenccli_image_external.cpp b/Source/astcenccli_image_external.cpp index e41b9abf..3d548071 100644 --- a/Source/astcenccli_image_external.cpp +++ b/Source/astcenccli_image_external.cpp @@ -84,7 +84,7 @@ astcenc_image* load_png_with_wuffs( const char* filename, bool y_flip, bool& is_hdr, - unsigned int& component_count + size_t& component_count ) { is_hdr = false; component_count = 4; diff --git a/Source/astcenccli_image_load_store.cpp b/Source/astcenccli_image_load_store.cpp index 41ac4de9..af76450b 100644 --- a/Source/astcenccli_image_load_store.cpp +++ b/Source/astcenccli_image_load_store.cpp @@ -46,7 +46,7 @@ static std::string get_output_filename( const astcenc_image* img, const char* filename, - unsigned int index + size_t index ) { if (img->dim_z <= 1) { @@ -83,7 +83,7 @@ static astcenc_image* load_image_with_tinyexr( const char* filename, bool y_flip, bool& is_hdr, - unsigned int& component_count + size_t& component_count ) { int dim_x, dim_y; float* image; @@ -119,7 +119,7 @@ static astcenc_image* load_image_with_stb( const char* filename, bool y_flip, bool& is_hdr, - unsigned int& component_count + size_t& component_count ) { int dim_x, dim_y; @@ -168,7 +168,7 @@ static bool store_exr_image_with_tinyexr( ) { int res { 0 }; - for (unsigned int i = 0; i < img->dim_z; i++) + for (size_t i = 0; i < img->dim_z; i++) { std::string fnmod = get_output_filename(img, filename, i); float* buf = floatx4_array_from_astc_img(img, y_flip, i); @@ -202,7 +202,7 @@ static bool store_png_image_with_stb( assert(img->data_type == ASTCENC_TYPE_U8); - for (unsigned int i = 0; i < img->dim_z; i++) + for (size_t i = 0; i < img->dim_z; i++) { std::string fnmod = get_output_filename(img, filename, i); uint8_t* buf = reinterpret_cast(img->data[i]); @@ -236,7 +236,7 @@ static bool store_tga_image_with_stb( assert(img->data_type == ASTCENC_TYPE_U8); - for (unsigned int i = 0; i < img->dim_z; i++) + for (size_t i = 0; i < img->dim_z; i++) { std::string fnmod = get_output_filename(img, filename, i); uint8_t* buf = reinterpret_cast(img->data[i]); @@ -270,7 +270,7 @@ static bool store_bmp_image_with_stb( assert(img->data_type == ASTCENC_TYPE_U8); - for (unsigned int i = 0; i < img->dim_z; i++) + for (size_t i = 0; i < img->dim_z; i++) { std::string fnmod = get_output_filename(img, filename, i); uint8_t* buf = reinterpret_cast(img->data[i]); @@ -302,7 +302,7 @@ static bool store_hdr_image_with_stb( ) { int res { 0 }; - for (unsigned int i = 0; i < img->dim_z; i++) + for (size_t i = 0; i < img->dim_z; i++) { std::string fnmod = get_output_filename(img, filename, i); float* buf = floatx4_array_from_astc_img(img, y_flip, i); @@ -787,11 +787,11 @@ static uint32_t u32_byterev(uint32_t v) struct format_entry { - unsigned int x; - unsigned int y; - unsigned int z; + size_t x; + size_t y; + size_t z; bool is_srgb; - unsigned int format; + size_t format; }; static const std::array ASTC_FORMATS = @@ -851,7 +851,7 @@ static const std::array ASTC_FORMATS = }}; static const format_entry* get_format( - unsigned int format + size_t format ) { for (auto& it : ASTC_FORMATS) { @@ -863,10 +863,10 @@ static const format_entry* get_format( return nullptr; } -static unsigned int get_format( - unsigned int x, - unsigned int y, - unsigned int z, +static size_t get_format( + size_t x, + size_t y, + size_t z, bool is_srgb ) { for (auto& it : ASTC_FORMATS) @@ -935,7 +935,7 @@ static astcenc_image* load_ktx_uncompressed_image( const char* filename, bool y_flip, bool& is_hdr, - unsigned int& component_count + size_t& component_count ) { FILE *f = fopen(filename, "rb"); if (!f) @@ -1173,9 +1173,9 @@ static astcenc_image* load_ktx_uncompressed_image( } - unsigned int dim_x = hdr.pixel_width; - unsigned int dim_y = astc::max(hdr.pixel_height, 1u); - unsigned int dim_z = astc::max(hdr.pixel_depth, 1u); + size_t dim_x = hdr.pixel_width; + size_t dim_y = astc::max(hdr.pixel_height, 1u); + size_t dim_z = astc::max(hdr.pixel_depth, 1u); // ignore the key/value data fseek(f, hdr.bytes_of_key_value_data, SEEK_CUR); @@ -1232,12 +1232,12 @@ static astcenc_image* load_ktx_uncompressed_image( // Transfer data from the surface to our own image data structure astcenc_image *astc_img = alloc_image(bitness, dim_x, dim_y, dim_z); - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { - unsigned int ymod = y_flip ? dim_y - y - 1 : y; - unsigned int ydst = ymod; + size_t ymod = y_flip ? dim_y - y - 1 : y; + size_t ydst = ymod; void *dst; if (astc_img->data_type == ASTCENC_TYPE_U8) @@ -1335,7 +1335,7 @@ bool load_ktx_compressed_image( } // Read the length of the data and endianess convert - unsigned int data_len; + size_t data_len; actual = fread(&data_len, 1, sizeof(data_len), f); if (actual != sizeof(data_len)) { @@ -1391,7 +1391,7 @@ bool store_ktx_compressed_image( const char* filename, bool is_srgb ) { - unsigned int fmt = get_format(img.block_x, img.block_y, img.block_z, is_srgb); + size_t fmt = get_format(img.block_x, img.block_y, img.block_z, is_srgb); ktx_header hdr; memcpy(hdr.magic, ktx_magic, 12); @@ -1445,9 +1445,9 @@ static bool store_ktx_uncompressed_image( const char* filename, int y_flip ) { - unsigned int dim_x = img->dim_x; - unsigned int dim_y = img->dim_y; - unsigned int dim_z = img->dim_z; + size_t dim_x = img->dim_x; + size_t dim_y = img->dim_y; + size_t dim_z = img->dim_z; int bitness = img->data_type == ASTCENC_TYPE_U8 ? 8 : 16; int image_components = determine_image_components(img); @@ -1497,43 +1497,43 @@ static bool store_ktx_uncompressed_image( row_pointers8[0] = new uint8_t *[dim_y * dim_z]; row_pointers8[0][0] = new uint8_t[dim_x * dim_y * dim_z * image_components + 3]; - for (unsigned int z = 1; z < dim_z; z++) + for (size_t z = 1; z < dim_z; z++) { row_pointers8[z] = row_pointers8[0] + dim_y * z; row_pointers8[z][0] = row_pointers8[0][0] + dim_y * dim_x * image_components * z; } - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { - for (unsigned int y = 1; y < dim_y; y++) + for (size_t y = 1; y < dim_y; y++) { row_pointers8[z][y] = row_pointers8[z][0] + dim_x * image_components * y; } } - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { uint8_t* data8 = static_cast(img->data[z]); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { int ym = y_flip ? dim_y - y - 1 : y; switch (image_components) { case 1: // single-component, treated as Luminance - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers8[z][y][x] = data8[(4 * dim_x * ym) + (4 * x )]; } break; case 2: // two-component, treated as Luminance-Alpha - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers8[z][y][2 * x ] = data8[(4 * dim_x * ym) + (4 * x )]; row_pointers8[z][y][2 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 3)]; } break; case 3: // three-component, treated a - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers8[z][y][3 * x ] = data8[(4 * dim_x * ym) + (4 * x )]; row_pointers8[z][y][3 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)]; @@ -1541,7 +1541,7 @@ static bool store_ktx_uncompressed_image( } break; case 4: // four-component, treated as RGBA - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers8[z][y][4 * x ] = data8[(4 * dim_x * ym) + (4 * x )]; row_pointers8[z][y][4 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)]; @@ -1559,43 +1559,43 @@ static bool store_ktx_uncompressed_image( row_pointers16[0] = new uint16_t *[dim_y * dim_z]; row_pointers16[0][0] = new uint16_t[dim_x * dim_y * dim_z * image_components + 1]; - for (unsigned int z = 1; z < dim_z; z++) + for (size_t z = 1; z < dim_z; z++) { row_pointers16[z] = row_pointers16[0] + dim_y * z; row_pointers16[z][0] = row_pointers16[0][0] + dim_y * dim_x * image_components * z; } - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { - for (unsigned int y = 1; y < dim_y; y++) + for (size_t y = 1; y < dim_y; y++) { row_pointers16[z][y] = row_pointers16[z][0] + dim_x * image_components * y; } } - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { uint16_t* data16 = static_cast(img->data[z]); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { int ym = y_flip ? dim_y - y - 1 : y; switch (image_components) { case 1: // single-component, treated as Luminance - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers16[z][y][x] = data16[(4 * dim_x * ym) + (4 * x )]; } break; case 2: // two-component, treated as Luminance-Alpha - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers16[z][y][2 * x ] = data16[(4 * dim_x * ym) + (4 * x )]; row_pointers16[z][y][2 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 3)]; } break; case 3: // three-component, treated as RGB - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers16[z][y][3 * x ] = data16[(4 * dim_x * ym) + (4 * x )]; row_pointers16[z][y][3 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)]; @@ -1603,7 +1603,7 @@ static bool store_ktx_uncompressed_image( } break; case 4: // four-component, treated as RGBA - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers16[z][y][4 * x ] = data16[(4 * dim_x * ym) + (4 * x )]; row_pointers16[z][y][4 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)]; @@ -1760,7 +1760,7 @@ static astcenc_image* load_dds_uncompressed_image( const char* filename, bool y_flip, bool& is_hdr, - unsigned int& component_count + size_t& component_count ) { FILE *f = fopen(filename, "rb"); if (!f) @@ -1817,9 +1817,9 @@ static astcenc_image* load_dds_uncompressed_image( } } - unsigned int dim_x = hdr.width; - unsigned int dim_y = hdr.height; - unsigned int dim_z = (hdr.flags & 0x800000) ? hdr.depth : 1; + size_t dim_x = hdr.width; + size_t dim_y = hdr.height; + size_t dim_z = (hdr.flags & 0x800000) ? hdr.depth : 1; // The bitcount that we will use internally in the codec int bitness = 0; @@ -2007,12 +2007,12 @@ static astcenc_image* load_dds_uncompressed_image( // then transfer data from the surface to our own image-data-structure. astcenc_image *astc_img = alloc_image(bitness, dim_x, dim_y, dim_z); - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { - unsigned int ymod = y_flip ? dim_y - y - 1 : y; - unsigned int ydst = ymod; + size_t ymod = y_flip ? dim_y - y - 1 : y; + size_t ydst = ymod; void* dst; if (astc_img->data_type == ASTCENC_TYPE_U8) @@ -2052,9 +2052,9 @@ static bool store_dds_uncompressed_image( const char* filename, int y_flip ) { - unsigned int dim_x = img->dim_x; - unsigned int dim_y = img->dim_y; - unsigned int dim_z = img->dim_z; + size_t dim_x = img->dim_x; + size_t dim_y = img->dim_y; + size_t dim_z = img->dim_z; int bitness = img->data_type == ASTCENC_TYPE_U8 ? 8 : 16; int image_components = (bitness == 16) ? 4 : determine_image_components(img); @@ -2089,7 +2089,7 @@ static bool store_dds_uncompressed_image( hdr.pitch_or_linear_size = image_components * (bitness / 8) * dim_x; hdr.depth = dim_z; hdr.mipmapcount = 1; - for (unsigned int i = 0; i < 11; i++) + for (size_t i = 0; i < 11; i++) { hdr.reserved1[i] = 0; } @@ -2126,44 +2126,44 @@ static bool store_dds_uncompressed_image( row_pointers8[0] = new uint8_t *[dim_y * dim_z]; row_pointers8[0][0] = new uint8_t[dim_x * dim_y * dim_z * image_components]; - for (unsigned int z = 1; z < dim_z; z++) + for (size_t z = 1; z < dim_z; z++) { row_pointers8[z] = row_pointers8[0] + dim_y * z; row_pointers8[z][0] = row_pointers8[0][0] + dim_y * dim_z * image_components * z; } - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { - for (unsigned int y = 1; y < dim_y; y++) + for (size_t y = 1; y < dim_y; y++) { row_pointers8[z][y] = row_pointers8[z][0] + dim_x * image_components * y; } } - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { uint8_t* data8 = static_cast(img->data[z]); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { int ym = y_flip ? dim_y - y - 1 : y; switch (image_components) { case 1: // single-component, treated as Luminance - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers8[z][y][x] = data8[(4 * dim_x * ym) + (4 * x )]; } break; case 2: // two-component, treated as Luminance-Alpha - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers8[z][y][2 * x ] = data8[(4 * dim_x * ym) + (4 * x )]; row_pointers8[z][y][2 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 3)]; } break; case 3: // three-component, treated as RGB - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers8[z][y][3 * x ] = data8[(4 * dim_x * ym) + (4 * x )]; row_pointers8[z][y][3 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)]; @@ -2171,7 +2171,7 @@ static bool store_dds_uncompressed_image( } break; case 4: // four-component, treated as RGBA - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers8[z][y][4 * x ] = data8[(4 * dim_x * ym) + (4 * x )]; row_pointers8[z][y][4 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)]; @@ -2189,44 +2189,44 @@ static bool store_dds_uncompressed_image( row_pointers16[0] = new uint16_t *[dim_y * dim_z]; row_pointers16[0][0] = new uint16_t[dim_x * dim_y * dim_z * image_components]; - for (unsigned int z = 1; z < dim_z; z++) + for (size_t z = 1; z < dim_z; z++) { row_pointers16[z] = row_pointers16[0] + dim_y * z; row_pointers16[z][0] = row_pointers16[0][0] + dim_y * dim_x * image_components * z; } - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { - for (unsigned int y = 1; y < dim_y; y++) + for (size_t y = 1; y < dim_y; y++) { row_pointers16[z][y] = row_pointers16[z][0] + dim_x * image_components * y; } } - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { uint16_t* data16 = static_cast(img->data[z]); - for (unsigned int y = 0; y < dim_y; y++) + for (size_t y = 0; y < dim_y; y++) { int ym = y_flip ? dim_y - y - 1: y; switch (image_components) { case 1: // single-component, treated as Luminance - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers16[z][y][x] = data16[(4 * dim_x * ym) + (4 * x )]; } break; case 2: // two-component, treated as Luminance-Alpha - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers16[z][y][2 * x ] = data16[(4 * dim_x * ym) + (4 * x )]; row_pointers16[z][y][2 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 3)]; } break; case 3: // three-component, treated as RGB - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers16[z][y][3 * x ] = data16[(4 * dim_x * ym) + (4 * x )]; row_pointers16[z][y][3 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)]; @@ -2234,7 +2234,7 @@ static bool store_dds_uncompressed_image( } break; case 4: // four-component, treated as RGBA - for (unsigned int x = 0; x < dim_x; x++) + for (size_t x = 0; x < dim_x; x++) { row_pointers16[z][y][4 * x ] = data16[(4 * dim_x * ym) + (4 * x )]; row_pointers16[z][y][4 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)]; @@ -2311,7 +2311,7 @@ static const struct { const char* ending1; const char* ending2; - astcenc_image* (*loader_func)(const char*, bool, bool&, unsigned int&); + astcenc_image* (*loader_func)(const char*, bool, bool&, size_t&); } loader_descs[] { // LDR formats {".png", ".PNG", load_png_with_wuffs}, @@ -2377,7 +2377,7 @@ astcenc_image* load_ncimage( const char* filename, bool y_flip, bool& is_hdr, - unsigned int& component_count + size_t& component_count ) { // Get the file extension const char* eptr = strrchr(filename, '.'); @@ -2387,7 +2387,7 @@ astcenc_image* load_ncimage( } // Scan through descriptors until a matching loader is found - for (unsigned int i = 0; i < loader_descr_count; i++) + for (size_t i = 0; i < loader_descr_count; i++) { if (loader_descs[i].ending1 == nullptr || strcmp(eptr, loader_descs[i].ending1) == 0 @@ -2443,16 +2443,16 @@ struct astc_header static const uint32_t ASTC_MAGIC_ID = 0x5CA1AB13; -static unsigned int unpack_bytes( +static size_t unpack_bytes( uint8_t a, uint8_t b, uint8_t c, uint8_t d ) { - return (static_cast(a) ) + - (static_cast(b) << 8) + - (static_cast(c) << 16) + - (static_cast(d) << 24); + return (static_cast(a) ) + + (static_cast(b) << 8) + + (static_cast(c) << 16) + + (static_cast(d) << 24); } /* See header for documentation. */ @@ -2475,7 +2475,7 @@ int load_cimage( return 1; } - unsigned int magicval = unpack_bytes(hdr.magic[0], hdr.magic[1], hdr.magic[2], hdr.magic[3]); + size_t magicval = unpack_bytes(hdr.magic[0], hdr.magic[1], hdr.magic[2], hdr.magic[3]); if (magicval != ASTC_MAGIC_ID) { print_error("ERROR: File not recognized '%s'\n", filename); @@ -2483,13 +2483,13 @@ int load_cimage( } // Ensure these are not zero to avoid div by zero - unsigned int block_x = astc::max(static_cast(hdr.block_x), 1u); - unsigned int block_y = astc::max(static_cast(hdr.block_y), 1u); - unsigned int block_z = astc::max(static_cast(hdr.block_z), 1u); + size_t block_x = astc::max(static_cast(hdr.block_x), 1_z); + size_t block_y = astc::max(static_cast(hdr.block_y), 1_z); + size_t block_z = astc::max(static_cast(hdr.block_z), 1_z); - unsigned int dim_x = unpack_bytes(hdr.dim_x[0], hdr.dim_x[1], hdr.dim_x[2], 0); - unsigned int dim_y = unpack_bytes(hdr.dim_y[0], hdr.dim_y[1], hdr.dim_y[2], 0); - unsigned int dim_z = unpack_bytes(hdr.dim_z[0], hdr.dim_z[1], hdr.dim_z[2], 0); + size_t dim_x = unpack_bytes(hdr.dim_x[0], hdr.dim_x[1], hdr.dim_x[2], 0); + size_t dim_y = unpack_bytes(hdr.dim_y[0], hdr.dim_y[1], hdr.dim_y[2], 0); + size_t dim_z = unpack_bytes(hdr.dim_z[0], hdr.dim_z[1], hdr.dim_z[2], 0); if (dim_x == 0 || dim_y == 0 || dim_z == 0) { @@ -2497,9 +2497,9 @@ int load_cimage( return 1; } - unsigned int xblocks = (dim_x + block_x - 1) / block_x; - unsigned int yblocks = (dim_y + block_y - 1) / block_y; - unsigned int zblocks = (dim_z + block_z - 1) / block_z; + size_t xblocks = (dim_x + block_x - 1) / block_x; + size_t yblocks = (dim_y + block_y - 1) / block_y; + size_t zblocks = (dim_z + block_z - 1) / block_z; size_t data_size = xblocks * yblocks * zblocks * 16; uint8_t *buffer = new uint8_t[data_size]; diff --git a/Source/astcenccli_internal.h b/Source/astcenccli_internal.h index ea3ae384..0043d27d 100644 --- a/Source/astcenccli_internal.h +++ b/Source/astcenccli_internal.h @@ -36,22 +36,22 @@ struct astc_compressed_image { /** @brief The block width in texels. */ - unsigned int block_x; + size_t block_x; /** @brief The block height in texels. */ - unsigned int block_y; + size_t block_y; /** @brief The block depth in texels. */ - unsigned int block_z; + size_t block_z; /** @brief The image width in texels. */ - unsigned int dim_x; + size_t dim_x; /** @brief The image height in texels. */ - unsigned int dim_y; + size_t dim_y; /** @brief The image depth in texels. */ - unsigned int dim_z; + size_t dim_z; /** @brief The binary data payload. */ uint8_t* data; @@ -66,13 +66,13 @@ struct astc_compressed_image struct cli_config_options { /** @brief The number of threads to use for processing. */ - unsigned int thread_count; + size_t thread_count; /** @brief The number of repeats to execute for benchmarking. */ - unsigned int repeat_count; + size_t repeat_count; /** @brief The number of image slices to load for a 3D image. */ - unsigned int array_size; + size_t array_size; /** @brief @c true if running in silent mode with minimal output. */ bool silentmode; @@ -130,7 +130,7 @@ astcenc_image* load_ncimage( const char* filename, bool y_flip, bool& is_hdr, - unsigned int& component_count); + size_t& component_count); /** * @brief Load uncompressed PNG image. @@ -146,7 +146,7 @@ astcenc_image* load_png_with_wuffs( const char* filename, bool y_flip, bool& is_hdr, - unsigned int& component_count); + size_t& component_count); /** * @brief Save an uncompressed image. @@ -189,10 +189,10 @@ int get_output_filename_enforced_bitness( * @return The allocated image, or @c nullptr on error. */ astcenc_image* alloc_image( - unsigned int bitness, - unsigned int dim_x, - unsigned int dim_y, - unsigned int dim_z); + size_t bitness, + size_t dim_x, + size_t dim_y, + size_t dim_z); /** * @brief Free an image. @@ -276,8 +276,8 @@ bool store_ktx_compressed_image( */ astcenc_image* astc_img_from_floatx4_array( const float* data, - unsigned int dim_x, - unsigned int dim_y, + size_t dim_x, + size_t dim_y, bool y_flip); /** @@ -292,8 +292,8 @@ astcenc_image* astc_img_from_floatx4_array( */ astcenc_image* astc_img_from_unorm8x4_array( const uint8_t* data, - unsigned int dim_x, - unsigned int dim_y, + size_t dim_x, + size_t dim_y, bool y_flip); /** @@ -310,7 +310,7 @@ astcenc_image* astc_img_from_unorm8x4_array( float* floatx4_array_from_astc_img( const astcenc_image* img, bool y_flip, - unsigned int z_index); + size_t z_index); /** * @brief Create a flattened RGBA UNORM8 data array from an image structure. diff --git a/Source/astcenccli_toplevel.cpp b/Source/astcenccli_toplevel.cpp index e71a6680..8dfcfda7 100644 --- a/Source/astcenccli_toplevel.cpp +++ b/Source/astcenccli_toplevel.cpp @@ -40,7 +40,7 @@ Data structure definitions ============================================================================ */ -typedef unsigned int astcenc_operation; +typedef size_t astcenc_operation; struct mode_entry { @@ -54,25 +54,25 @@ struct mode_entry ============================================================================ */ /** @brief Stage bit indicating we need to load a compressed image. */ -static const unsigned int ASTCENC_STAGE_LD_COMP = 1 << 0; +static const size_t ASTCENC_STAGE_LD_COMP = 1 << 0; /** @brief Stage bit indicating we need to store a compressed image. */ -static const unsigned int ASTCENC_STAGE_ST_COMP = 1 << 1; +static const size_t ASTCENC_STAGE_ST_COMP = 1 << 1; /** @brief Stage bit indicating we need to load an uncompressed image. */ -static const unsigned int ASTCENC_STAGE_LD_NCOMP = 1 << 2; +static const size_t ASTCENC_STAGE_LD_NCOMP = 1 << 2; /** @brief Stage bit indicating we need to store an uncompressed image. */ -static const unsigned int ASTCENC_STAGE_ST_NCOMP = 1 << 3; +static const size_t ASTCENC_STAGE_ST_NCOMP = 1 << 3; /** @brief Stage bit indicating we need compress an image. */ -static const unsigned int ASTCENC_STAGE_COMPRESS = 1 << 4; +static const size_t ASTCENC_STAGE_COMPRESS = 1 << 4; /** @brief Stage bit indicating we need to decompress an image. */ -static const unsigned int ASTCENC_STAGE_DECOMPRESS = 1 << 5; +static const size_t ASTCENC_STAGE_DECOMPRESS = 1 << 5; /** @brief Stage bit indicating we need to compare an image with the original input. */ -static const unsigned int ASTCENC_STAGE_COMPARE = 1 << 6; +static const size_t ASTCENC_STAGE_COMPARE = 1 << 6; /** @brief Operation indicating an unknown request (should never happen). */ static const astcenc_operation ASTCENC_OP_UNKNOWN = 0; @@ -168,18 +168,18 @@ struct decompression_workload extern "C" void progress_emitter( float value ) { - const unsigned int bar_size = 25; - unsigned int parts = static_cast(value / 4.0f); + const size_t bar_size = 25; + size_t parts = static_cast(value / 4.0f); char buffer[bar_size + 3]; buffer[0] = '['; - for (unsigned int i = 0; i < parts; i++) + for (size_t i = 0; i < parts; i++) { buffer[i + 1] = '='; } - for (unsigned int i = parts; i < bar_size; i++) + for (size_t i = parts; i < bar_size; i++) { buffer[i + 1] = ' '; } @@ -293,7 +293,7 @@ static void decompression_workload_runner( */ static std::string get_slice_filename( const std::string& basename, - unsigned int index, + size_t index, bool& error ) { size_t sep = basename.find_last_of('.'); @@ -323,10 +323,10 @@ static std::string get_slice_filename( */ static astcenc_image* load_uncomp_file( const char* filename, - unsigned int dim_z, + size_t dim_z, bool y_flip, bool& is_hdr, - unsigned int& component_count + size_t& component_count ) { astcenc_image *image = nullptr; @@ -338,12 +338,12 @@ static astcenc_image* load_uncomp_file( else { bool slice_is_hdr; - unsigned int slice_component_count; + size_t slice_component_count; astcenc_image* slice = nullptr; std::vector slices; // For a 3D image load an array of slices - for (unsigned int image_index = 0; image_index < dim_z; image_index++) + for (size_t image_index = 0; image_index < dim_z; image_index++) { bool error; std::string slice_name = get_slice_filename(filename, image_index, error); @@ -396,15 +396,15 @@ static astcenc_image* load_uncomp_file( // If all slices loaded correctly then repack them into a single image if (slices.size() == dim_z) { - unsigned int dim_x = slices[0]->dim_x; - unsigned int dim_y = slices[0]->dim_y; + size_t dim_x = slices[0]->dim_x; + size_t dim_y = slices[0]->dim_y; int bitness = is_hdr ? 16 : 8; int slice_size = dim_x * dim_y; image = alloc_image(bitness, dim_x, dim_y, dim_z); // Combine 2D source images into one 3D image - for (unsigned int z = 0; z < dim_z; z++) + for (size_t z = 0; z < dim_z; z++) { if (image->data_type == ASTCENC_TYPE_U8) { @@ -503,9 +503,9 @@ static int init_astcenc_config( astcenc_preprocess& preprocess, astcenc_config& config ) { - unsigned int block_x = 0; - unsigned int block_y = 0; - unsigned int block_z = 1; + size_t block_x = 0; + size_t block_y = 0; + size_t block_z = 1; // For decode the block size is set by the incoming image. if (operation == ASTCENC_OP_DECOMPRESS) @@ -530,7 +530,7 @@ static int init_astcenc_config( } int cnt2D, cnt3D; - int dimensions = sscanf(argv[4], "%ux%u%nx%u%n", + int dimensions = sscanf(argv[4], "%zux%zu%nx%zu%n", &block_x, &block_y, &cnt2D, &block_z, &cnt3D); // Character after the last match should be a NUL if (!(((dimensions == 2) && !argv[4][cnt2D]) || ((dimensions == 3) && !argv[4][cnt3D]))) @@ -583,7 +583,7 @@ static int init_astcenc_config( argidx = 6; } - unsigned int flags = 0; + size_t flags = 0; // Gather the flags that we need while (argidx < argc) @@ -1152,7 +1152,7 @@ static int edit_astcenc_config( argidx++; // Read array size (image depth). - if (!sscanf(argv[argidx], "%u", &cli_config.array_size) || cli_config.array_size == 0) + if (!sscanf(argv[argidx], "%zu", &cli_config.array_size) || cli_config.array_size == 0) { print_error("ERROR: -zdim size '%s' is invalid\n", argv[argidx]); return 1; @@ -1243,36 +1243,36 @@ static void print_astcenc_config( if (config.block_z == 1) { - printf(" Block size: %ux%u\n", config.block_x, config.block_y); + printf(" Block size: %zux%zu\n", config.block_x, config.block_y); } else { - printf(" Block size: %ux%ux%u\n", config.block_x, config.block_y, config.block_z); + printf(" Block size: %zux%zux%zu\n", config.block_x, config.block_y, config.block_z); } printf(" Bitrate: %3.2f bpp\n", 128.0 / (config.block_x * config.block_y * config.block_z)); - printf(" RGB alpha scale weight: %d\n", (config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT)); + printf(" RGB alpha scale weight: %zu\n", (config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT)); if ((config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT)) { - printf(" Radius RGB alpha scale: %u texels\n", config.a_scale_radius); + printf(" Radius RGB alpha scale: %zu texels\n", config.a_scale_radius); } printf(" R component weight: %g\n", static_cast(config.cw_r_weight)); printf(" G component weight: %g\n", static_cast(config.cw_g_weight)); printf(" B component weight: %g\n", static_cast(config.cw_b_weight)); printf(" A component weight: %g\n", static_cast(config.cw_a_weight)); - printf(" Partition cutoff: %u partitions\n", config.tune_partition_count_limit); - printf(" 2 partition index cutoff: %u partition ids\n", config.tune_2partition_index_limit); - printf(" 3 partition index cutoff: %u partition ids\n", config.tune_3partition_index_limit); - printf(" 4 partition index cutoff: %u partition ids\n", config.tune_4partition_index_limit); + printf(" Partition cutoff: %zu partitions\n", config.tune_partition_count_limit); + printf(" 2 partition index cutoff: %zu partition ids\n", config.tune_2partition_index_limit); + printf(" 3 partition index cutoff: %zu partition ids\n", config.tune_3partition_index_limit); + printf(" 4 partition index cutoff: %zu partition ids\n", config.tune_4partition_index_limit); printf(" PSNR cutoff: %g dB\n", static_cast(config.tune_db_limit)); printf(" 3 partition cutoff: %g\n", static_cast(config.tune_2partition_early_out_limit_factor)); printf(" 4 partition cutoff: %g\n", static_cast(config.tune_3partition_early_out_limit_factor)); printf(" 2 plane correlation cutoff: %g\n", static_cast(config.tune_2plane_early_out_limit_correlation)); printf(" Block mode centile cutoff: %g%%\n", static_cast(config.tune_block_mode_limit)); - printf(" Candidate cutoff: %u candidates\n", config.tune_candidate_limit); - printf(" Refinement cutoff: %u iterations\n", config.tune_refinement_limit); - printf(" Compressor thread count: %d\n", cli_config.thread_count); + printf(" Candidate cutoff: %zu candidates\n", config.tune_candidate_limit); + printf(" Refinement cutoff: %zu iterations\n", config.tune_refinement_limit); + printf(" Compressor thread count: %zu\n", cli_config.thread_count); printf("\n"); } } @@ -1293,9 +1293,9 @@ static void print_astcenc_config( */ static vfloat4 image_get_pixel( const astcenc_image& img, - unsigned int x, - unsigned int y, - unsigned int z + size_t x, + size_t y, + size_t z ) { // We should never escape bounds assert(x < img.dim_x); @@ -1351,9 +1351,9 @@ static vfloat4 image_get_pixel( */ static void image_set_pixel( astcenc_image& img, - unsigned int x, - unsigned int y, - unsigned int z, + size_t x, + size_t y, + size_t z, vfloat4 pixel ) { // We should never escape bounds @@ -1408,11 +1408,11 @@ static void image_preprocess_normalize( const astcenc_image& input, astcenc_image& output ) { - for (unsigned int z = 0; z < input.dim_z; z++) + for (size_t z = 0; z < input.dim_z; z++) { - for (unsigned int y = 0; y < input.dim_y; y++) + for (size_t y = 0; y < input.dim_y; y++) { - for (unsigned int x = 0; x < input.dim_x; x++) + for (size_t x = 0; x < input.dim_x; x++) { vfloat4 pixel = image_get_pixel(input, x, y, z); @@ -1487,11 +1487,11 @@ static void image_preprocess_premultiply( astcenc_image& output, astcenc_profile profile ) { - for (unsigned int z = 0; z < input.dim_z; z++) + for (size_t z = 0; z < input.dim_z; z++) { - for (unsigned int y = 0; y < input.dim_y; y++) + for (size_t y = 0; y < input.dim_y; y++) { - for (unsigned int x = 0; x < input.dim_x; x++) + for (size_t x = 0; x < input.dim_x; x++) { vfloat4 pixel = image_get_pixel(input, x, y, z); @@ -2011,7 +2011,7 @@ int astcenc_main( } astcenc_image* image_uncomp_in = nullptr ; - unsigned int image_uncomp_in_component_count = 0; + size_t image_uncomp_in_component_count = 0; bool image_uncomp_in_is_hdr = false; astcenc_image* image_decomp_out = nullptr; @@ -2126,15 +2126,15 @@ int astcenc_main( printf(" Color profile: %s\n", image_uncomp_in_is_hdr ? "HDR" : "LDR"); if (image_uncomp_in->dim_z > 1) { - printf(" Dimensions: 3D, %ux%ux%u\n", + printf(" Dimensions: 3D, %zux%zux%zu\n", image_uncomp_in->dim_x, image_uncomp_in->dim_y, image_uncomp_in->dim_z); } else { - printf(" Dimensions: 2D, %ux%u\n", + printf(" Dimensions: 2D, %zux%zu\n", image_uncomp_in->dim_x, image_uncomp_in->dim_y); } - printf(" Components: %d\n\n", image_uncomp_in_component_count); + printf(" Components: %zu\n\n", image_uncomp_in_component_count); } } @@ -2159,9 +2159,9 @@ int astcenc_main( { print_astcenc_config(cli_config, config); - unsigned int blocks_x = (image_uncomp_in->dim_x + config.block_x - 1) / config.block_x; - unsigned int blocks_y = (image_uncomp_in->dim_y + config.block_y - 1) / config.block_y; - unsigned int blocks_z = (image_uncomp_in->dim_z + config.block_z - 1) / config.block_z; + size_t blocks_x = (image_uncomp_in->dim_x + config.block_x - 1) / config.block_x; + size_t blocks_y = (image_uncomp_in->dim_y + config.block_y - 1) / config.block_y; + size_t blocks_z = (image_uncomp_in->dim_z + config.block_z - 1) / config.block_z; size_t buffer_size = blocks_x * blocks_y * blocks_z * 16; uint8_t* buffer = new uint8_t[buffer_size]; @@ -2176,7 +2176,7 @@ int astcenc_main( // Only launch worker threads for multi-threaded use - it makes basic // single-threaded profiling and debugging a little less convoluted double start_compression_time = get_time(); - for (unsigned int i = 0; i < cli_config.repeat_count; i++) + for (size_t i = 0; i < cli_config.repeat_count; i++) { if (config.progress_callback) { @@ -2244,7 +2244,7 @@ int astcenc_main( // Only launch worker threads for multi-threaded use - it makes basic // single-threaded profiling and debugging a little less convoluted double start_decompression_time = get_time(); - for (unsigned int i = 0; i < cli_config.repeat_count; i++) + for (size_t i = 0; i < cli_config.repeat_count; i++) { double start_iter_time = get_time(); if (cli_config.thread_count > 1) diff --git a/Source/astcenccli_toplevel_help.cpp b/Source/astcenccli_toplevel_help.cpp index f475b39c..56bbca1c 100644 --- a/Source/astcenccli_toplevel_help.cpp +++ b/Source/astcenccli_toplevel_help.cpp @@ -582,7 +582,7 @@ void astcenc_print_header() const char* f16ctype = ""; #endif - unsigned int bits = static_cast(sizeof(void*) * 8); + size_t bits = static_cast(sizeof(void*) * 8); printf(astcenc_copyright_string, VERSION_STRING, bits, simdtype, pcnttype, f16ctype, YEAR_STRING);