Skip to content

Switch to size_t part 1 #556

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 34 additions & 34 deletions Source/astcenc_averages_and_directions.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2023 Arm Limited
// Copyright 2011-2025 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
Expand Down Expand Up @@ -50,7 +50,7 @@ static void compute_partition_averages_rgb(
vfloat4 averages[BLOCK_MAX_PARTITIONS]
) {
unsigned int partition_count = pi.partition_count;
unsigned int texel_count = blk.texel_count;
size_t texel_count = blk.texel_count;
promise(texel_count > 0);

// For 1 partition just use the precomputed mean
Expand All @@ -64,7 +64,7 @@ static void compute_partition_averages_rgb(
vfloatacc pp_avg_rgb[3] {};

vint lane_id = vint::lane_id();
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
{
vint texel_partition(pi.partition_of_texel + i);

Expand Down Expand Up @@ -100,7 +100,7 @@ static void compute_partition_averages_rgb(
vfloatacc pp_avg_rgb[2][3] {};

vint lane_id = vint::lane_id();
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
{
vint texel_partition(pi.partition_of_texel + i);

Expand Down Expand Up @@ -145,7 +145,7 @@ static void compute_partition_averages_rgb(
vfloatacc pp_avg_rgb[3][3] {};

vint lane_id = vint::lane_id();
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
{
vint texel_partition(pi.partition_of_texel + i);

Expand Down Expand Up @@ -221,7 +221,7 @@ static void compute_partition_averages_rgba(
vfloat4 averages[BLOCK_MAX_PARTITIONS]
) {
unsigned int partition_count = pi.partition_count;
unsigned int texel_count = blk.texel_count;
size_t texel_count = blk.texel_count;
promise(texel_count > 0);

// For 1 partition just use the precomputed mean
Expand All @@ -235,7 +235,7 @@ static void compute_partition_averages_rgba(
vfloat4 pp_avg_rgba[4] {};

vint lane_id = vint::lane_id();
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
{
vint texel_partition(pi.partition_of_texel + i);

Expand Down Expand Up @@ -275,7 +275,7 @@ static void compute_partition_averages_rgba(
vfloat4 pp_avg_rgba[2][4] {};

vint lane_id = vint::lane_id();
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
{
vint texel_partition(pi.partition_of_texel + i);

Expand Down Expand Up @@ -326,7 +326,7 @@ static void compute_partition_averages_rgba(
vfloat4 pp_avg_rgba[3][4] {};

vint lane_id = vint::lane_id();
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
{
vint texel_partition(pi.partition_of_texel + i);

Expand Down Expand Up @@ -390,17 +390,17 @@ void compute_avgs_and_dirs_4_comp(
const image_block& blk,
partition_metrics pm[BLOCK_MAX_PARTITIONS]
) {
int partition_count = pi.partition_count;
size_t partition_count = pi.partition_count;
promise(partition_count > 0);

// Pre-compute partition_averages
vfloat4 partition_averages[BLOCK_MAX_PARTITIONS];
compute_partition_averages_rgba(pi, blk, partition_averages);

for (int partition = 0; partition < partition_count; partition++)
for (size_t partition = 0; partition < partition_count; partition++)
{
const uint8_t *texel_indexes = pi.texels_of_partition[partition];
unsigned int texel_count = pi.partition_texel_count[partition];
size_t texel_count = pi.partition_texel_count[partition];
promise(texel_count > 0);

vfloat4 average = partition_averages[partition];
Expand All @@ -411,7 +411,7 @@ void compute_avgs_and_dirs_4_comp(
vfloat4 sum_zp = vfloat4::zero();
vfloat4 sum_wp = vfloat4::zero();

for (unsigned int i = 0; i < texel_count; i++)
for (size_t i = 0; i < texel_count; i++)
{
unsigned int iwt = texel_indexes[i];
vfloat4 texel_datum = blk.texel(iwt);
Expand Down Expand Up @@ -509,13 +509,13 @@ void compute_avgs_and_dirs_3_comp(
partition_averages[3] = partition_averages[3].swz<0, 1, 2>();
}

unsigned int partition_count = pi.partition_count;
size_t partition_count = pi.partition_count;
promise(partition_count > 0);

for (unsigned int partition = 0; partition < partition_count; partition++)
for (size_t partition = 0; partition < partition_count; partition++)
{
const uint8_t *texel_indexes = pi.texels_of_partition[partition];
unsigned int texel_count = pi.partition_texel_count[partition];
size_t texel_count = pi.partition_texel_count[partition];
promise(texel_count > 0);

vfloat4 average = partition_averages[partition];
Expand All @@ -525,7 +525,7 @@ void compute_avgs_and_dirs_3_comp(
vfloat4 sum_yp = vfloat4::zero();
vfloat4 sum_zp = vfloat4::zero();

for (unsigned int i = 0; i < texel_count; i++)
for (size_t i = 0; i < texel_count; i++)
{
unsigned int iwt = texel_indexes[i];

Expand Down Expand Up @@ -570,17 +570,17 @@ void compute_avgs_and_dirs_3_comp_rgb(
const image_block& blk,
partition_metrics pm[BLOCK_MAX_PARTITIONS]
) {
unsigned int partition_count = pi.partition_count;
size_t partition_count = pi.partition_count;
promise(partition_count > 0);

// Pre-compute partition_averages
vfloat4 partition_averages[BLOCK_MAX_PARTITIONS];
compute_partition_averages_rgb(pi, blk, partition_averages);

for (unsigned int partition = 0; partition < partition_count; partition++)
for (size_t partition = 0; partition < partition_count; partition++)
{
const uint8_t *texel_indexes = pi.texels_of_partition[partition];
unsigned int texel_count = pi.partition_texel_count[partition];
size_t texel_count = pi.partition_texel_count[partition];
promise(texel_count > 0);

vfloat4 average = partition_averages[partition];
Expand All @@ -590,7 +590,7 @@ void compute_avgs_and_dirs_3_comp_rgb(
vfloat4 sum_yp = vfloat4::zero();
vfloat4 sum_zp = vfloat4::zero();

for (unsigned int i = 0; i < texel_count; i++)
for (size_t i = 0; i < texel_count; i++)
{
unsigned int iwt = texel_indexes[i];

Expand Down Expand Up @@ -664,20 +664,20 @@ void compute_avgs_and_dirs_2_comp(
data_vg = blk.data_b;
}

unsigned int partition_count = pt.partition_count;
size_t partition_count = pt.partition_count;
promise(partition_count > 0);

for (unsigned int partition = 0; partition < partition_count; partition++)
for (size_t partition = 0; partition < partition_count; partition++)
{
const uint8_t *texel_indexes = pt.texels_of_partition[partition];
unsigned int texel_count = pt.partition_texel_count[partition];
size_t texel_count = pt.partition_texel_count[partition];
promise(texel_count > 0);

// Only compute a partition mean if more than one partition
if (partition_count > 1)
{
average = vfloat4::zero();
for (unsigned int i = 0; i < texel_count; i++)
for (size_t i = 0; i < texel_count; i++)
{
unsigned int iwt = texel_indexes[i];
average += vfloat2(data_vr[iwt], data_vg[iwt]);
Expand All @@ -691,7 +691,7 @@ void compute_avgs_and_dirs_2_comp(
vfloat4 sum_xp = vfloat4::zero();
vfloat4 sum_yp = vfloat4::zero();

for (unsigned int i = 0; i < texel_count; i++)
for (size_t i = 0; i < texel_count; i++)
{
unsigned int iwt = texel_indexes[i];
vfloat4 texel_datum = vfloat2(data_vr[iwt], data_vg[iwt]);
Expand Down Expand Up @@ -729,20 +729,20 @@ void compute_error_squared_rgba(
float& uncor_error,
float& samec_error
) {
unsigned int partition_count = pi.partition_count;
size_t partition_count = pi.partition_count;
promise(partition_count > 0);

vfloatacc uncor_errorsumv = vfloatacc::zero();
vfloatacc samec_errorsumv = vfloatacc::zero();

for (unsigned int partition = 0; partition < partition_count; partition++)
for (size_t partition = 0; partition < partition_count; partition++)
{
const uint8_t *texel_indexes = pi.texels_of_partition[partition];

processed_line4 l_uncor = uncor_plines[partition];
processed_line4 l_samec = samec_plines[partition];

unsigned int texel_count = pi.partition_texel_count[partition];
size_t texel_count = pi.partition_texel_count[partition];
promise(texel_count > 0);

// Vectorize some useful scalar inputs
Expand Down Expand Up @@ -775,7 +775,7 @@ void compute_error_squared_rgba(
// array to extend the last value. This means min/max are not impacted, but we need to mask
// out the dummy values when we compute the line weighting.
vint lane_ids = vint::lane_id();
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
{
vmask mask = lane_ids < vint(texel_count);
const uint8_t* texel_idxs = texel_indexes + i;
Expand Down Expand Up @@ -847,17 +847,17 @@ void compute_error_squared_rgb(
float& uncor_error,
float& samec_error
) {
unsigned int partition_count = pi.partition_count;
size_t partition_count = pi.partition_count;
promise(partition_count > 0);

vfloatacc uncor_errorsumv = vfloatacc::zero();
vfloatacc samec_errorsumv = vfloatacc::zero();

for (unsigned int partition = 0; partition < partition_count; partition++)
for (size_t partition = 0; partition < partition_count; partition++)
{
partition_lines3& pl = plines[partition];
const uint8_t *texel_indexes = pi.texels_of_partition[partition];
unsigned int texel_count = pi.partition_texel_count[partition];
size_t texel_count = pi.partition_texel_count[partition];
promise(texel_count > 0);

processed_line3 l_uncor = pl.uncor_pline;
Expand Down Expand Up @@ -889,7 +889,7 @@ void compute_error_squared_rgb(
// to extend the last value. This means min/max are not impacted, but we need to mask
// out the dummy values when we compute the line weighting.
vint lane_ids = vint::lane_id();
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
{
vmask mask = lane_ids < vint(texel_count);
const uint8_t* texel_idxs = texel_indexes + i;
Expand Down
4 changes: 2 additions & 2 deletions Source/astcenc_find_best_partitioning.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2024 Arm Limited
// Copyright 2011-2025 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
Expand Down Expand Up @@ -226,7 +226,7 @@ static void kmeans_update(

uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 };

// Find the center-of-gravity in each cluster
// Find the center of gravity in each cluster
for (unsigned int i = 0; i < texel_count; i++)
{
uint8_t partition = partition_of_texel[i];
Expand Down
2 changes: 1 addition & 1 deletion Source/astcenc_ideal_endpoints_and_weights.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,6 @@ void compute_ideal_weights_for_decimation(
}

// Otherwise compute an estimate and perform single refinement iteration
ASTCENC_ALIGNAS float infilled_weights[BLOCK_MAX_TEXELS];

// Compute an initial average for each decimated weight
bool constant_wes = ei.is_constant_weight_error_scale;
Expand Down Expand Up @@ -908,6 +907,7 @@ void compute_ideal_weights_for_decimation(
// Populate the interpolated weight grid based on the initial average
// Process SIMD-width texel coordinates at at time while we can. Safe to
// over-process full SIMD vectors - the tail is zeroed.
ASTCENC_ALIGNAS float infilled_weights[BLOCK_MAX_TEXELS];
if (di.max_texel_weight_count <= 2)
{
for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
Expand Down