diff --git a/Source/astcenc.h b/Source/astcenc.h
index 8ecdc16f..da96cdeb 100644
--- a/Source/astcenc.h
+++ b/Source/astcenc.h
@@ -317,7 +317,7 @@ extern "C" typedef void (*astcenc_progress_callback)(float);
  * be e.g. rrrg (the default ordering for ASTC normals on the command line) or gggr (the ordering
  * used by BC5n).
  */
-static const unsigned int ASTCENC_FLG_MAP_NORMAL          = 1 << 0;
+static const size_t ASTCENC_FLG_MAP_NORMAL          = 1 << 0;
 
 /**
  * @brief Enable compression heuristics that assume use of decode_unorm8 decode mode.
@@ -330,7 +330,7 @@ static const unsigned int ASTCENC_FLG_MAP_NORMAL          = 1 << 0;
  * Note that LDR_SRGB images will always use decode_unorm8 for the RGB channels, irrespective of
  * this setting.
  */
-static const unsigned int ASTCENC_FLG_USE_DECODE_UNORM8        = 1 << 1;
+static const size_t ASTCENC_FLG_USE_DECODE_UNORM8        = 1 << 1;
 
 /**
  * @brief Enable alpha weighting.
@@ -339,7 +339,7 @@ static const unsigned int ASTCENC_FLG_USE_DECODE_UNORM8        = 1 << 1;
  * the transparency level. This allows the codec to more accurately encode the alpha value in areas
  * where the color value is less significant.
  */
-static const unsigned int ASTCENC_FLG_USE_ALPHA_WEIGHT     = 1 << 2;
+static const size_t ASTCENC_FLG_USE_ALPHA_WEIGHT     = 1 << 2;
 
 /**
  * @brief Enable perceptual error metrics.
@@ -347,7 +347,7 @@ static const unsigned int ASTCENC_FLG_USE_ALPHA_WEIGHT     = 1 << 2;
  * This mode enables perceptual compression mode, which will optimize for perceptual error rather
  * than best PSNR. Only some input modes support perceptual error metrics.
  */
-static const unsigned int ASTCENC_FLG_USE_PERCEPTUAL       = 1 << 3;
+static const size_t ASTCENC_FLG_USE_PERCEPTUAL       = 1 << 3;
 
 /**
  * @brief Create a decompression-only context.
@@ -355,7 +355,7 @@ static const unsigned int ASTCENC_FLG_USE_PERCEPTUAL       = 1 << 3;
  * This mode disables support for compression. This enables context allocation to skip some
  * transient buffer allocation, resulting in lower memory usage.
  */
-static const unsigned int ASTCENC_FLG_DECOMPRESS_ONLY      = 1 << 4;
+static const size_t ASTCENC_FLG_DECOMPRESS_ONLY      = 1 << 4;
 
 /**
  * @brief Create a self-decompression context.
@@ -365,7 +365,7 @@ static const unsigned int ASTCENC_FLG_DECOMPRESS_ONLY      = 1 << 4;
  * cases, and setting this flag enables additional optimizations, but does mean that the context
  * cannot reliably decompress arbitrary ASTC images.
  */
-static const unsigned int ASTCENC_FLG_SELF_DECOMPRESS_ONLY = 1 << 5;
+static const size_t ASTCENC_FLG_SELF_DECOMPRESS_ONLY = 1 << 5;
 
 /**
  * @brief Enable RGBM map compression.
@@ -388,12 +388,12 @@ static const unsigned int ASTCENC_FLG_SELF_DECOMPRESS_ONLY = 1 << 5;
  * scale, ensuring that the M value is accurately encoded. This defaults to 10 when in RGBM mode,
  * matching the default scale factor.
  */
-static const unsigned int ASTCENC_FLG_MAP_RGBM             = 1 << 6;
+static const size_t ASTCENC_FLG_MAP_RGBM             = 1 << 6;
 
 /**
  * @brief The bit mask of all valid flags.
  */
-static const unsigned int ASTCENC_ALL_FLAGS =
+static const size_t ASTCENC_ALL_FLAGS =
                               ASTCENC_FLG_MAP_NORMAL |
                               ASTCENC_FLG_MAP_RGBM |
                               ASTCENC_FLG_USE_ALPHA_WEIGHT |
@@ -418,16 +418,16 @@ struct astcenc_config
 	astcenc_profile profile;
 
 	/** @brief The set of set flags. */
-	unsigned int flags;
+	size_t flags;
 
 	/** @brief The ASTC block size X dimension. */
-	unsigned int block_x;
+	size_t block_x;
 
 	/** @brief The ASTC block size Y dimension. */
-	unsigned int block_y;
+	size_t block_y;
 
 	/** @brief The ASTC block size Z dimension. */
-	unsigned int block_z;
+	size_t block_z;
 
 	/** @brief The red component weight scale for error weighting (-cw). */
 	float cw_r_weight;
@@ -448,7 +448,7 @@ struct astcenc_config
 	 * will be sampled using linear texture filtering to minimize color bleed out of transparent
 	 * texels that are adjacent to non-transparent texels.
 	 */
-	unsigned int a_scale_radius;
+	size_t a_scale_radius;
 
 	/** @brief The RGBM scale factor for the shared multiplier (-rgbm). */
 	float rgbm_m_scale;
@@ -458,35 +458,35 @@ struct astcenc_config
 	 *
 	 * Valid values are between 1 and 4.
 	 */
-	unsigned int tune_partition_count_limit;
+	size_t tune_partition_count_limit;
 
 	/**
 	 * @brief The maximum number of partitions searched (-2partitionindexlimit).
 	 *
 	 * Valid values are between 1 and 1024.
 	 */
-	unsigned int tune_2partition_index_limit;
+	size_t tune_2partition_index_limit;
 
 	/**
 	 * @brief The maximum number of partitions searched (-3partitionindexlimit).
 	 *
 	 * Valid values are between 1 and 1024.
 	 */
-	unsigned int tune_3partition_index_limit;
+	size_t tune_3partition_index_limit;
 
 	/**
 	 * @brief The maximum number of partitions searched (-4partitionindexlimit).
 	 *
 	 * Valid values are between 1 and 1024.
 	 */
-	unsigned int tune_4partition_index_limit;
+	size_t tune_4partition_index_limit;
 
 	/**
 	 * @brief The maximum centile for block modes searched (-blockmodelimit).
 	 *
 	 * Valid values are between 1 and 100.
 	 */
-	unsigned int tune_block_mode_limit;
+	size_t tune_block_mode_limit;
 
 	/**
 	 * @brief The maximum iterative refinements applied (-refinementlimit).
@@ -494,35 +494,35 @@ struct astcenc_config
 	 * Valid values are between 1 and N; there is no technical upper limit
 	 * but little benefit is expected after N=4.
 	 */
-	unsigned int tune_refinement_limit;
+	size_t tune_refinement_limit;
 
 	/**
 	 * @brief The number of trial candidates per mode search (-candidatelimit).
 	 *
 	 * Valid values are between 1 and TUNE_MAX_TRIAL_CANDIDATES.
 	 */
-	unsigned int tune_candidate_limit;
+	size_t tune_candidate_limit;
 
 	/**
 	 * @brief The number of trial partitionings per search (-2partitioncandidatelimit).
 	 *
 	 * Valid values are between 1 and TUNE_MAX_PARTITIONING_CANDIDATES.
 	 */
-	unsigned int tune_2partitioning_candidate_limit;
+	size_t tune_2partitioning_candidate_limit;
 
 	/**
 	 * @brief The number of trial partitionings per search (-3partitioncandidatelimit).
 	 *
 	 * Valid values are between 1 and TUNE_MAX_PARTITIONING_CANDIDATES.
 	 */
-	unsigned int tune_3partitioning_candidate_limit;
+	size_t tune_3partitioning_candidate_limit;
 
 	/**
 	 * @brief The number of trial partitionings per search (-4partitioncandidatelimit).
 	 *
 	 * Valid values are between 1 and TUNE_MAX_PARTITIONING_CANDIDATES.
 	 */
-	unsigned int tune_4partitioning_candidate_limit;
+	size_t tune_4partitioning_candidate_limit;
 
 	/**
 	 * @brief The dB threshold for stopping block search (-dblimit).
@@ -601,13 +601,13 @@ struct astcenc_config
 struct astcenc_image
 {
 	/** @brief The X dimension of the image, in texels. */
-	unsigned int dim_x;
+	size_t dim_x;
 
 	/** @brief The Y dimension of the image, in texels. */
-	unsigned int dim_y;
+	size_t dim_y;
 
 	/** @brief The Z dimension of the image, in texels. */
-	unsigned int dim_z;
+	size_t dim_z;
 
 	/** @brief The data type per component. */
 	astcenc_type data_type;
@@ -628,16 +628,16 @@ struct astcenc_block_info
 	astcenc_profile profile;
 
 	/** @brief The number of texels in the X dimension. */
-	unsigned int block_x;
+	size_t block_x;
 
 	/** @brief The number of texels in the Y dimension. */
-	unsigned int block_y;
+	size_t block_y;
 
 	/** @brief The number of texel in the Z dimension. */
-	unsigned int block_z;
+	size_t block_z;
 
 	/** @brief The number of texels in the block. */
-	unsigned int texel_count;
+	size_t texel_count;
 
 	/** @brief True if this block is an error block. */
 	bool is_error_block;
@@ -652,31 +652,31 @@ struct astcenc_block_info
 	bool is_dual_plane_block;
 
 	/** @brief The number of partitions if not constant color. */
-	unsigned int partition_count;
+	size_t partition_count;
 
 	/** @brief The partition index if 2 - 4 partitions used. */
-	unsigned int partition_index;
+	size_t partition_index;
 
 	/** @brief The component index of the second plane if dual plane. */
-	unsigned int dual_plane_component;
+	size_t dual_plane_component;
 
 	/** @brief The color endpoint encoding mode for each partition. */
-	unsigned int color_endpoint_modes[4];
+	size_t color_endpoint_modes[4];
 
 	/** @brief The number of color endpoint quantization levels. */
-	unsigned int color_level_count;
+	size_t color_level_count;
 
 	/** @brief The number of weight quantization levels. */
-	unsigned int weight_level_count;
+	size_t weight_level_count;
 
 	/** @brief The number of weights in the X dimension. */
-	unsigned int weight_x;
+	size_t weight_x;
 
 	/** @brief The number of weights in the Y dimension. */
-	unsigned int weight_y;
+	size_t weight_y;
 
 	/** @brief The number of weights in the Z dimension. */
-	unsigned int weight_z;
+	size_t weight_z;
 
 	/** @brief The unpacked color endpoints for each partition. */
 	float color_endpoints[4][2][4];
@@ -712,11 +712,11 @@ struct astcenc_block_info
  */
 ASTCENC_PUBLIC astcenc_error astcenc_config_init(
 	astcenc_profile profile,
-	unsigned int block_x,
-	unsigned int block_y,
-	unsigned int block_z,
+	size_t block_x,
+	size_t block_y,
+	size_t block_z,
 	float quality,
-	unsigned int flags,
+	size_t flags,
 	astcenc_config* config);
 
 /**
@@ -739,7 +739,7 @@ ASTCENC_PUBLIC astcenc_error astcenc_config_init(
  */
 ASTCENC_PUBLIC astcenc_error astcenc_context_alloc(
 	const astcenc_config* config,
-	unsigned int thread_count,
+	size_t thread_count,
 	astcenc_context** context);
 
 /**
@@ -766,7 +766,7 @@ ASTCENC_PUBLIC astcenc_error astcenc_compress_image(
 	const astcenc_swizzle* swizzle,
 	uint8_t* data_out,
 	size_t data_len,
-	unsigned int thread_index);
+	size_t thread_index);
 
 /**
  * @brief Reset the codec state for a new compression.
@@ -816,7 +816,7 @@ ASTCENC_PUBLIC astcenc_error astcenc_decompress_image(
 	size_t data_len,
 	astcenc_image* image_out,
 	const astcenc_swizzle* swizzle,
-	unsigned int thread_index);
+	size_t thread_index);
 
 /**
  * @brief Reset the codec state for a new decompression.
diff --git a/Source/astcenc_averages_and_directions.cpp b/Source/astcenc_averages_and_directions.cpp
index 8e2f8d8c..3864ece2 100644
--- a/Source/astcenc_averages_and_directions.cpp
+++ b/Source/astcenc_averages_and_directions.cpp
@@ -49,8 +49,8 @@ static void compute_partition_averages_rgb(
 	const image_block& blk,
 	vfloat4 averages[BLOCK_MAX_PARTITIONS]
 ) {
-	unsigned int partition_count = pi.partition_count;
-	unsigned int texel_count = blk.texel_count;
+	size_t partition_count = pi.partition_count;
+	size_t texel_count = blk.texel_count;
 	promise(texel_count > 0);
 
 	// For 1 partition just use the precomputed mean
@@ -64,7 +64,7 @@ static void compute_partition_averages_rgb(
 		vfloatacc pp_avg_rgb[3] {};
 
 		vint lane_id = vint::lane_id();
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vint texel_partition(pi.partition_of_texel + i);
 
@@ -100,7 +100,7 @@ static void compute_partition_averages_rgb(
 		vfloatacc pp_avg_rgb[2][3] {};
 
 		vint lane_id = vint::lane_id();
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vint texel_partition(pi.partition_of_texel + i);
 
@@ -145,7 +145,7 @@ static void compute_partition_averages_rgb(
 		vfloatacc pp_avg_rgb[3][3] {};
 
 		vint lane_id = vint::lane_id();
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vint texel_partition(pi.partition_of_texel + i);
 
@@ -220,8 +220,8 @@ static void compute_partition_averages_rgba(
 	const image_block& blk,
 	vfloat4 averages[BLOCK_MAX_PARTITIONS]
 ) {
-	unsigned int partition_count = pi.partition_count;
-	unsigned int texel_count = blk.texel_count;
+	size_t partition_count = pi.partition_count;
+	size_t texel_count = blk.texel_count;
 	promise(texel_count > 0);
 
 	// For 1 partition just use the precomputed mean
@@ -235,7 +235,7 @@ static void compute_partition_averages_rgba(
 		vfloat4 pp_avg_rgba[4] {};
 
 		vint lane_id = vint::lane_id();
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vint texel_partition(pi.partition_of_texel + i);
 
@@ -275,7 +275,7 @@ static void compute_partition_averages_rgba(
 		vfloat4 pp_avg_rgba[2][4] {};
 
 		vint lane_id = vint::lane_id();
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vint texel_partition(pi.partition_of_texel + i);
 
@@ -326,7 +326,7 @@ static void compute_partition_averages_rgba(
 		vfloat4 pp_avg_rgba[3][4] {};
 
 		vint lane_id = vint::lane_id();
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vint texel_partition(pi.partition_of_texel + i);
 
@@ -400,7 +400,7 @@ void compute_avgs_and_dirs_4_comp(
 	for (int partition = 0; partition < partition_count; partition++)
 	{
 		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
-		unsigned int texel_count = pi.partition_texel_count[partition];
+		size_t texel_count = pi.partition_texel_count[partition];
 		promise(texel_count > 0);
 
 		vfloat4 average = partition_averages[partition];
@@ -411,9 +411,9 @@ void compute_avgs_and_dirs_4_comp(
 		vfloat4 sum_zp = vfloat4::zero();
 		vfloat4 sum_wp = vfloat4::zero();
 
-		for (unsigned int i = 0; i < texel_count; i++)
+		for (size_t i = 0; i < texel_count; i++)
 		{
-			unsigned int iwt = texel_indexes[i];
+			size_t iwt = texel_indexes[i];
 			vfloat4 texel_datum = blk.texel(iwt);
 			texel_datum = texel_datum - average;
 
@@ -459,7 +459,7 @@ void compute_avgs_and_dirs_4_comp(
 void compute_avgs_and_dirs_3_comp(
 	const partition_info& pi,
 	const image_block& blk,
-	unsigned int omitted_component,
+	size_t omitted_component,
 	partition_metrics pm[BLOCK_MAX_PARTITIONS]
 ) {
 	// Pre-compute partition_averages
@@ -509,13 +509,13 @@ void compute_avgs_and_dirs_3_comp(
 		partition_averages[3] = partition_averages[3].swz<0, 1, 2>();
 	}
 
-	unsigned int partition_count = pi.partition_count;
+	size_t partition_count = pi.partition_count;
 	promise(partition_count > 0);
 
-	for (unsigned int partition = 0; partition < partition_count; partition++)
+	for (size_t partition = 0; partition < partition_count; partition++)
 	{
 		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
-		unsigned int texel_count = pi.partition_texel_count[partition];
+		size_t texel_count = pi.partition_texel_count[partition];
 		promise(texel_count > 0);
 
 		vfloat4 average = partition_averages[partition];
@@ -525,9 +525,9 @@ void compute_avgs_and_dirs_3_comp(
 		vfloat4 sum_yp = vfloat4::zero();
 		vfloat4 sum_zp = vfloat4::zero();
 
-		for (unsigned int i = 0; i < texel_count; i++)
+		for (size_t i = 0; i < texel_count; i++)
 		{
-			unsigned int iwt = texel_indexes[i];
+			size_t iwt = texel_indexes[i];
 
 			vfloat4 texel_datum = vfloat3(data_vr[iwt],
 			                              data_vg[iwt],
@@ -570,17 +570,17 @@ void compute_avgs_and_dirs_3_comp_rgb(
 	const image_block& blk,
 	partition_metrics pm[BLOCK_MAX_PARTITIONS]
 ) {
-	unsigned int partition_count = pi.partition_count;
+	size_t partition_count = pi.partition_count;
 	promise(partition_count > 0);
 
 	// Pre-compute partition_averages
 	vfloat4 partition_averages[BLOCK_MAX_PARTITIONS];
 	compute_partition_averages_rgb(pi, blk, partition_averages);
 
-	for (unsigned int partition = 0; partition < partition_count; partition++)
+	for (size_t partition = 0; partition < partition_count; partition++)
 	{
 		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
-		unsigned int texel_count = pi.partition_texel_count[partition];
+		size_t texel_count = pi.partition_texel_count[partition];
 		promise(texel_count > 0);
 
 		vfloat4 average = partition_averages[partition];
@@ -590,9 +590,9 @@ void compute_avgs_and_dirs_3_comp_rgb(
 		vfloat4 sum_yp = vfloat4::zero();
 		vfloat4 sum_zp = vfloat4::zero();
 
-		for (unsigned int i = 0; i < texel_count; i++)
+		for (size_t i = 0; i < texel_count; i++)
 		{
-			unsigned int iwt = texel_indexes[i];
+			size_t iwt = texel_indexes[i];
 
 			vfloat4 texel_datum = blk.texel3(iwt);
 			texel_datum = texel_datum - average;
@@ -631,8 +631,8 @@ void compute_avgs_and_dirs_3_comp_rgb(
 void compute_avgs_and_dirs_2_comp(
 	const partition_info& pt,
 	const image_block& blk,
-	unsigned int component1,
-	unsigned int component2,
+	size_t component1,
+	size_t component2,
 	partition_metrics pm[BLOCK_MAX_PARTITIONS]
 ) {
 	vfloat4 average;
@@ -664,22 +664,22 @@ void compute_avgs_and_dirs_2_comp(
 		data_vg = blk.data_b;
 	}
 
-	unsigned int partition_count = pt.partition_count;
+	size_t partition_count = pt.partition_count;
 	promise(partition_count > 0);
 
-	for (unsigned int partition = 0; partition < partition_count; partition++)
+	for (size_t partition = 0; partition < partition_count; partition++)
 	{
 		const uint8_t *texel_indexes = pt.texels_of_partition[partition];
-		unsigned int texel_count = pt.partition_texel_count[partition];
+		size_t texel_count = pt.partition_texel_count[partition];
 		promise(texel_count > 0);
 
 		// Only compute a partition mean if more than one partition
 		if (partition_count > 1)
 		{
 			average = vfloat4::zero();
-			for (unsigned int i = 0; i < texel_count; i++)
+			for (size_t i = 0; i < texel_count; i++)
 			{
-				unsigned int iwt = texel_indexes[i];
+				size_t iwt = texel_indexes[i];
 				average += vfloat2(data_vr[iwt], data_vg[iwt]);
 			}
 
@@ -691,9 +691,9 @@ void compute_avgs_and_dirs_2_comp(
 		vfloat4 sum_xp = vfloat4::zero();
 		vfloat4 sum_yp = vfloat4::zero();
 
-		for (unsigned int i = 0; i < texel_count; i++)
+		for (size_t i = 0; i < texel_count; i++)
 		{
-			unsigned int iwt = texel_indexes[i];
+			size_t iwt = texel_indexes[i];
 			vfloat4 texel_datum = vfloat2(data_vr[iwt], data_vg[iwt]);
 			texel_datum = texel_datum - average;
 
@@ -729,20 +729,20 @@ void compute_error_squared_rgba(
 	float& uncor_error,
 	float& samec_error
 ) {
-	unsigned int partition_count = pi.partition_count;
+	size_t partition_count = pi.partition_count;
 	promise(partition_count > 0);
 
 	vfloatacc uncor_errorsumv = vfloatacc::zero();
 	vfloatacc samec_errorsumv = vfloatacc::zero();
 
-	for (unsigned int partition = 0; partition < partition_count; partition++)
+	for (size_t partition = 0; partition < partition_count; partition++)
 	{
 		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
 
 		processed_line4 l_uncor = uncor_plines[partition];
 		processed_line4 l_samec = samec_plines[partition];
 
-		unsigned int texel_count = pi.partition_texel_count[partition];
+		size_t texel_count = pi.partition_texel_count[partition];
 		promise(texel_count > 0);
 
 		// Vectorize some useful scalar inputs
@@ -775,7 +775,7 @@ void compute_error_squared_rgba(
 		// array to extend the last value. This means min/max are not impacted, but we need to mask
 		// out the dummy values when we compute the line weighting.
 		vint lane_ids = vint::lane_id();
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vmask mask = lane_ids < vint(texel_count);
 			const uint8_t* texel_idxs = texel_indexes + i;
@@ -847,17 +847,17 @@ void compute_error_squared_rgb(
 	float& uncor_error,
 	float& samec_error
 ) {
-	unsigned int partition_count = pi.partition_count;
+	size_t partition_count = pi.partition_count;
 	promise(partition_count > 0);
 
 	vfloatacc uncor_errorsumv = vfloatacc::zero();
 	vfloatacc samec_errorsumv = vfloatacc::zero();
 
-	for (unsigned int partition = 0; partition < partition_count; partition++)
+	for (size_t partition = 0; partition < partition_count; partition++)
 	{
 		partition_lines3& pl = plines[partition];
 		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
-		unsigned int texel_count = pi.partition_texel_count[partition];
+		size_t texel_count = pi.partition_texel_count[partition];
 		promise(texel_count > 0);
 
 		processed_line3 l_uncor = pl.uncor_pline;
@@ -889,7 +889,7 @@ void compute_error_squared_rgb(
 		// to extend the last value. This means min/max are not impacted, but we need to mask
 		// out the dummy values when we compute the line weighting.
 		vint lane_ids = vint::lane_id();
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vmask mask = lane_ids < vint(texel_count);
 			const uint8_t* texel_idxs = texel_indexes + i;
diff --git a/Source/astcenc_block_sizes.cpp b/Source/astcenc_block_sizes.cpp
index 17e51dfc..f91b394c 100644
--- a/Source/astcenc_block_sizes.cpp
+++ b/Source/astcenc_block_sizes.cpp
@@ -34,17 +34,17 @@
  * @return Returns true if a valid mode, false otherwise.
  */
 static bool decode_block_mode_2d(
-	unsigned int block_mode,
-	unsigned int& x_weights,
-	unsigned int& y_weights,
+	size_t block_mode,
+	size_t& x_weights,
+	size_t& y_weights,
 	bool& is_dual_plane,
-	unsigned int& quant_mode,
-	unsigned int& weight_bits
+	size_t& quant_mode,
+	size_t& weight_bits
 ) {
-	unsigned int base_quant_mode = (block_mode >> 4) & 1;
-	unsigned int H = (block_mode >> 9) & 1;
-	unsigned int D = (block_mode >> 10) & 1;
-	unsigned int A = (block_mode >> 5) & 0x3;
+	size_t base_quant_mode = (block_mode >> 4) & 1;
+	size_t H = (block_mode >> 9) & 1;
+	size_t D = (block_mode >> 10) & 1;
+	size_t A = (block_mode >> 5) & 0x3;
 
 	x_weights = 0;
 	y_weights = 0;
@@ -52,7 +52,7 @@ static bool decode_block_mode_2d(
 	if ((block_mode & 3) != 0)
 	{
 		base_quant_mode |= (block_mode & 3) << 1;
-		unsigned int B = (block_mode >> 7) & 3;
+		size_t B = (block_mode >> 7) & 3;
 		switch ((block_mode >> 2) & 3)
 		{
 		case 0:
@@ -90,7 +90,7 @@ static bool decode_block_mode_2d(
 			return false;
 		}
 
-		unsigned int B = (block_mode >> 9) & 3;
+		size_t B = (block_mode >> 9) & 3;
 		switch ((block_mode >> 7) & 3)
 		{
 		case 0:
@@ -126,7 +126,7 @@ static bool decode_block_mode_2d(
 		}
 	}
 
-	unsigned int weight_count = x_weights * y_weights * (D + 1);
+	size_t weight_count = x_weights * y_weights * (D + 1);
 	quant_mode = (base_quant_mode - 2) + 6 * H;
 	is_dual_plane = D != 0;
 
@@ -150,18 +150,18 @@ static bool decode_block_mode_2d(
  * @return Returns true if a valid mode, false otherwise.
  */
 static bool decode_block_mode_3d(
-	unsigned int block_mode,
-	unsigned int& x_weights,
-	unsigned int& y_weights,
-	unsigned int& z_weights,
+	size_t block_mode,
+	size_t& x_weights,
+	size_t& y_weights,
+	size_t& z_weights,
 	bool& is_dual_plane,
-	unsigned int& quant_mode,
-	unsigned int& weight_bits
+	size_t& quant_mode,
+	size_t& weight_bits
 ) {
-	unsigned int base_quant_mode = (block_mode >> 4) & 1;
-	unsigned int H = (block_mode >> 9) & 1;
-	unsigned int D = (block_mode >> 10) & 1;
-	unsigned int A = (block_mode >> 5) & 0x3;
+	size_t base_quant_mode = (block_mode >> 4) & 1;
+	size_t H = (block_mode >> 9) & 1;
+	size_t D = (block_mode >> 10) & 1;
+	size_t A = (block_mode >> 5) & 0x3;
 
 	x_weights = 0;
 	y_weights = 0;
@@ -170,8 +170,8 @@ static bool decode_block_mode_3d(
 	if ((block_mode & 3) != 0)
 	{
 		base_quant_mode |= (block_mode & 3) << 1;
-		unsigned int B = (block_mode >> 7) & 3;
-		unsigned int C = (block_mode >> 2) & 0x3;
+		size_t B = (block_mode >> 7) & 3;
+		size_t C = (block_mode >> 2) & 0x3;
 		x_weights = A + 2;
 		y_weights = B + 2;
 		z_weights = C + 2;
@@ -229,7 +229,7 @@ static bool decode_block_mode_3d(
 		}
 	}
 
-	unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1);
+	size_t weight_count = x_weights * y_weights * z_weights * (D + 1);
 	quant_mode = (base_quant_mode - 2) + 6 * H;
 	is_dual_plane = D != 0;
 
@@ -250,15 +250,15 @@ static bool decode_block_mode_3d(
  * @param[out] wb          The decimation table init scratch working buffers.
  */
 static void init_decimation_info_2d(
-	unsigned int x_texels,
-	unsigned int y_texels,
-	unsigned int x_weights,
-	unsigned int y_weights,
+	size_t x_texels,
+	size_t y_texels,
+	size_t x_weights,
+	size_t y_weights,
 	decimation_info& di,
 	dt_init_working_buffers& wb
 ) {
-	unsigned int texels_per_block = x_texels * y_texels;
-	unsigned int weights_per_block = x_weights * y_weights;
+	size_t texels_per_block = x_texels * y_texels;
+	size_t weights_per_block = x_weights * y_weights;
 
 	uint8_t max_texel_count_of_weight = 0;
 
@@ -267,46 +267,46 @@ static void init_decimation_info_2d(
 	promise(x_texels > 0);
 	promise(y_texels > 0);
 
-	for (unsigned int i = 0; i < weights_per_block; i++)
+	for (size_t i = 0; i < weights_per_block; i++)
 	{
 		wb.texel_count_of_weight[i] = 0;
 	}
 
-	for (unsigned int i = 0; i < texels_per_block; i++)
+	for (size_t i = 0; i < texels_per_block; i++)
 	{
 		wb.weight_count_of_texel[i] = 0;
 	}
 
-	for (unsigned int y = 0; y < y_texels; y++)
+	for (size_t y = 0; y < y_texels; y++)
 	{
-		for (unsigned int x = 0; x < x_texels; x++)
+		for (size_t x = 0; x < x_texels; x++)
 		{
-			unsigned int texel = y * x_texels + x;
+			size_t texel = y * x_texels + x;
 
-			unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
-			unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
+			size_t x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
+			size_t y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
 
-			unsigned int x_weight_frac = x_weight & 0xF;
-			unsigned int y_weight_frac = y_weight & 0xF;
-			unsigned int x_weight_int = x_weight >> 4;
-			unsigned int y_weight_int = y_weight >> 4;
+			size_t x_weight_frac = x_weight & 0xF;
+			size_t y_weight_frac = y_weight & 0xF;
+			size_t x_weight_int = x_weight >> 4;
+			size_t y_weight_int = y_weight >> 4;
 
-			unsigned int qweight[4];
+			size_t qweight[4];
 			qweight[0] = x_weight_int + y_weight_int * x_weights;
 			qweight[1] = qweight[0] + 1;
 			qweight[2] = qweight[0] + x_weights;
 			qweight[3] = qweight[2] + 1;
 
 			// Truncated-precision bilinear interpolation
-			unsigned int prod = x_weight_frac * y_weight_frac;
+			size_t prod = x_weight_frac * y_weight_frac;
 
-			unsigned int weight[4];
+			size_t weight[4];
 			weight[3] = (prod + 8) >> 4;
 			weight[1] = x_weight_frac - weight[3];
 			weight[2] = y_weight_frac - weight[3];
 			weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
 
-			for (unsigned int i = 0; i < 4; i++)
+			for (size_t i = 0; i < 4; i++)
 			{
 				if (weight[i] != 0)
 				{
@@ -323,12 +323,12 @@ static void init_decimation_info_2d(
 	}
 
 	uint8_t max_texel_weight_count = 0;
-	for (unsigned int i = 0; i < texels_per_block; i++)
+	for (size_t i = 0; i < texels_per_block; i++)
 	{
 		di.texel_weight_count[i] = wb.weight_count_of_texel[i];
 		max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
 
-		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
+		for (size_t j = 0; j < wb.weight_count_of_texel[i]; j++)
 		{
 			di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
 			di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
@@ -336,7 +336,7 @@ static void init_decimation_info_2d(
 		}
 
 		// Init all 4 entries so we can rely on zeros for vectorization
-		for (unsigned int j = wb.weight_count_of_texel[i]; j < 4; j++)
+		for (size_t j = wb.weight_count_of_texel[i]; j < 4; j++)
 		{
 			di.texel_weight_contribs_int_tr[j][i] = 0;
 			di.texel_weight_contribs_float_tr[j][i] = 0.0f;
@@ -346,12 +346,12 @@ static void init_decimation_info_2d(
 
 	di.max_texel_weight_count = max_texel_weight_count;
 
-	for (unsigned int i = 0; i < weights_per_block; i++)
+	for (size_t i = 0; i < weights_per_block; i++)
 	{
-		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
+		size_t texel_count_wt = wb.texel_count_of_weight[i];
 		di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
 
-		for (unsigned int j = 0; j < texel_count_wt; j++)
+		for (size_t j = 0; j < texel_count_wt; j++)
 		{
 			uint8_t texel = wb.texels_of_weight[i][j];
 
@@ -361,7 +361,7 @@ static void init_decimation_info_2d(
 
 			// Store the per-texel contribution of this weight for each texel it contributes to
 			di.texel_contrib_for_weight[j][i] = 0.0f;
-			for (unsigned int k = 0; k < 4; k++)
+			for (size_t k = 0; k < 4; k++)
 			{
 				uint8_t dttw = di.texel_weights_tr[k][texel];
 				float dttwf = di.texel_weight_contribs_float_tr[k][texel];
@@ -376,7 +376,7 @@ static void init_decimation_info_2d(
 		// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
 		// Match last texel in active lane in SIMD group, for better gathers
 		uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
-		for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
+		for (size_t j = texel_count_wt; j < max_texel_count_of_weight; j++)
 		{
 			di.weight_texels_tr[j][i] = last_texel;
 			di.weights_texel_contribs_tr[j][i] = 0.0f;
@@ -384,12 +384,12 @@ static void init_decimation_info_2d(
 	}
 
 	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
-	unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
-	for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++)
+	size_t texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
+	for (size_t i = texels_per_block; i < texels_per_block_simd; i++)
 	{
 		di.texel_weight_count[i] = 0;
 
-		for (unsigned int j = 0; j < 4; j++)
+		for (size_t j = 0; j < 4; j++)
 		{
 			di.texel_weight_contribs_float_tr[j][i] = 0;
 			di.texel_weights_tr[j][i] = 0;
@@ -399,15 +399,15 @@ static void init_decimation_info_2d(
 
 	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
 	// Match last texel in active lane in SIMD group, for better gathers
-	unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
+	size_t last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
 	uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
 
-	unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
-	for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++)
+	size_t weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
+	for (size_t i = weights_per_block; i < weights_per_block_simd; i++)
 	{
 		di.weight_texel_count[i] = 0;
 
-		for (unsigned int j = 0; j < max_texel_count_of_weight; j++)
+		for (size_t j = 0; j < max_texel_count_of_weight; j++)
 		{
 			di.weight_texels_tr[j][i] = last_texel;
 			di.weights_texel_contribs_tr[j][i] = 0.0f;
@@ -434,38 +434,38 @@ static void init_decimation_info_2d(
    @param[out] wb          The decimation table init scratch working buffers.
  */
 static void init_decimation_info_3d(
-	unsigned int x_texels,
-	unsigned int y_texels,
-	unsigned int z_texels,
-	unsigned int x_weights,
-	unsigned int y_weights,
-	unsigned int z_weights,
+	size_t x_texels,
+	size_t y_texels,
+	size_t z_texels,
+	size_t x_weights,
+	size_t y_weights,
+	size_t z_weights,
 	decimation_info& di,
 	dt_init_working_buffers& wb
 ) {
-	unsigned int texels_per_block = x_texels * y_texels * z_texels;
-	unsigned int weights_per_block = x_weights * y_weights * z_weights;
+	size_t texels_per_block = x_texels * y_texels * z_texels;
+	size_t weights_per_block = x_weights * y_weights * z_weights;
 
 	uint8_t max_texel_count_of_weight = 0;
 
 	promise(weights_per_block > 0);
 	promise(texels_per_block > 0);
 
-	for (unsigned int i = 0; i < weights_per_block; i++)
+	for (size_t i = 0; i < weights_per_block; i++)
 	{
 		wb.texel_count_of_weight[i] = 0;
 	}
 
-	for (unsigned int i = 0; i < texels_per_block; i++)
+	for (size_t i = 0; i < texels_per_block; i++)
 	{
 		wb.weight_count_of_texel[i] = 0;
 	}
 
-	for (unsigned int z = 0; z < z_texels; z++)
+	for (size_t z = 0; z < z_texels; z++)
 	{
-		for (unsigned int y = 0; y < y_texels; y++)
+		for (size_t y = 0; y < y_texels; y++)
 		{
-			for (unsigned int x = 0; x < x_texels; x++)
+			for (size_t x = 0; x < x_texels; x++)
 			{
 				int texel = (z * y_texels + y) * x_texels + x;
 
@@ -561,7 +561,7 @@ static void init_decimation_info_3d(
 				weight[2] = w2;
 				weight[3] = w3;
 
-				for (unsigned int i = 0; i < 4; i++)
+				for (size_t i = 0; i < 4; i++)
 				{
 					if (weight[i] != 0)
 					{
@@ -579,20 +579,20 @@ static void init_decimation_info_3d(
 	}
 
 	uint8_t max_texel_weight_count = 0;
-	for (unsigned int i = 0; i < texels_per_block; i++)
+	for (size_t i = 0; i < texels_per_block; i++)
 	{
 		di.texel_weight_count[i] = wb.weight_count_of_texel[i];
 		max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
 
 		// Init all 4 entries so we can rely on zeros for vectorization
-		for (unsigned int j = 0; j < 4; j++)
+		for (size_t j = 0; j < 4; j++)
 		{
 			di.texel_weight_contribs_int_tr[j][i] = 0;
 			di.texel_weight_contribs_float_tr[j][i] = 0.0f;
 			di.texel_weights_tr[j][i] = 0;
 		}
 
-		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
+		for (size_t j = 0; j < wb.weight_count_of_texel[i]; j++)
 		{
 			di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
 			di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
@@ -602,14 +602,14 @@ static void init_decimation_info_3d(
 
 	di.max_texel_weight_count = max_texel_weight_count;
 
-	for (unsigned int i = 0; i < weights_per_block; i++)
+	for (size_t i = 0; i < weights_per_block; i++)
 	{
-		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
+		size_t texel_count_wt = wb.texel_count_of_weight[i];
 		di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
 
-		for (unsigned int j = 0; j < texel_count_wt; j++)
+		for (size_t j = 0; j < texel_count_wt; j++)
 		{
-			unsigned int texel = wb.texels_of_weight[i][j];
+			size_t texel = wb.texels_of_weight[i][j];
 
 			// Create transposed versions of these for better vectorization
 			di.weight_texels_tr[j][i] = static_cast<uint8_t>(texel);
@@ -617,7 +617,7 @@ static void init_decimation_info_3d(
 
 			// Store the per-texel contribution of this weight for each texel it contributes to
 			di.texel_contrib_for_weight[j][i] = 0.0f;
-			for (unsigned int k = 0; k < 4; k++)
+			for (size_t k = 0; k < 4; k++)
 			{
 				uint8_t dttw = di.texel_weights_tr[k][texel];
 				float dttwf = di.texel_weight_contribs_float_tr[k][texel];
@@ -632,7 +632,7 @@ static void init_decimation_info_3d(
 		// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
 		// Match last texel in active lane in SIMD group, for better gathers
 		uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
-		for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
+		for (size_t j = texel_count_wt; j < max_texel_count_of_weight; j++)
 		{
 			di.weight_texels_tr[j][i] = last_texel;
 			di.weights_texel_contribs_tr[j][i] = 0.0f;
@@ -640,12 +640,12 @@ static void init_decimation_info_3d(
 	}
 
 	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
-	unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
-	for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++)
+	size_t texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
+	for (size_t i = texels_per_block; i < texels_per_block_simd; i++)
 	{
 		di.texel_weight_count[i] = 0;
 
-		for (unsigned int j = 0; j < 4; j++)
+		for (size_t j = 0; j < 4; j++)
 		{
 			di.texel_weight_contribs_float_tr[j][i] = 0;
 			di.texel_weights_tr[j][i] = 0;
@@ -658,8 +658,8 @@ static void init_decimation_info_3d(
 	int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
 	uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
 
-	unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
-	for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++)
+	size_t weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
+	for (size_t i = weights_per_block; i < weights_per_block_simd; i++)
 	{
 		di.weight_texel_count[i] = 0;
 
@@ -711,7 +711,7 @@ static void assign_kmeans_texels(
 	}
 
 	// Assign 64 random indices, retrying if we see repeats
-	unsigned int arr_elements_set = 0;
+	size_t arr_elements_set = 0;
 	while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS)
 	{
 		uint8_t texel = static_cast<uint8_t>(astc::rand(rng_state));
@@ -736,15 +736,15 @@ static void assign_kmeans_texels(
  * @param index       The packed array index to populate.
  */
 static void construct_dt_entry_2d(
-	unsigned int x_texels,
-	unsigned int y_texels,
-	unsigned int x_weights,
-	unsigned int y_weights,
+	size_t x_texels,
+	size_t y_texels,
+	size_t x_weights,
+	size_t y_weights,
 	block_size_descriptor& bsd,
 	dt_init_working_buffers& wb,
-	unsigned int index
+	size_t index
 ) {
-	unsigned int weight_count = x_weights * y_weights;
+	size_t weight_count = x_weights * y_weights;
 	assert(weight_count <= BLOCK_MAX_WEIGHTS);
 
 	bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS;
@@ -756,7 +756,7 @@ static void construct_dt_entry_2d(
 	int maxprec_2planes = -1;
 	for (int i = 0; i < 12; i++)
 	{
-		unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
+		size_t bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
 		if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
 		{
 			maxprec_1plane = i;
@@ -764,7 +764,7 @@ static void construct_dt_entry_2d(
 
 		if (try_2planes)
 		{
-			unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
+			size_t bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
 			if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
 			{
 				maxprec_2planes = i;
@@ -790,15 +790,15 @@ static void construct_dt_entry_2d(
  * @param[out] bsd              The block size descriptor to populate.
  */
 static void construct_block_size_descriptor_2d(
-	unsigned int x_texels,
-	unsigned int y_texels,
+	size_t x_texels,
+	size_t y_texels,
 	bool can_omit_modes,
 	float mode_cutoff,
 	block_size_descriptor& bsd
 ) {
 	// Store a remap table for storing packed decimation modes.
 	// Indexing uses [Y * 16 + X] and max size for each axis is 12.
-	static const unsigned int MAX_DMI = 12 * 16 + 12;
+	static const size_t MAX_DMI = 12 * 16 + 12;
 	int decimation_mode_index[MAX_DMI];
 
 	dt_init_working_buffers* wb = new dt_init_working_buffers;
@@ -808,7 +808,7 @@ static void construct_block_size_descriptor_2d(
 	bsd.zdim = 1;
 	bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels);
 
-	for (unsigned int i = 0; i < MAX_DMI; i++)
+	for (size_t i = 0; i < MAX_DMI; i++)
 	{
 		decimation_mode_index[i] = -1;
 	}
@@ -824,15 +824,15 @@ static void construct_block_size_descriptor_2d(
 #endif
 
 	// Construct the list of block formats referencing the decimation tables
-	unsigned int packed_bm_idx = 0;
-	unsigned int packed_dm_idx = 0;
+	size_t packed_bm_idx = 0;
+	size_t packed_dm_idx = 0;
 
 	// Trackers
-	unsigned int bm_counts[4] { 0 };
-	unsigned int dm_counts[4] { 0 };
+	size_t bm_counts[4] { 0 };
+	size_t dm_counts[4] { 0 };
 
 	// Clear the list to a known-bad value
-	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
+	for (size_t i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
 	{
 		bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
 	}
@@ -842,10 +842,10 @@ static void construct_block_size_descriptor_2d(
 	//   - Pass 1 - keep selected single plane "non-always" block modes
 	//   - Pass 2 - keep select dual plane block modes
 	//   - Pass 3 - keep everything else that's legal
-	unsigned int limit = can_omit_modes ? 3 : 4;
-	for (unsigned int j = 0; j < limit; j ++)
+	size_t limit = can_omit_modes ? 3 : 4;
+	for (size_t j = 0; j < limit; j ++)
 	{
-		for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
+		for (size_t i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
 		{
 			// Skip modes we've already included in a previous pass
 			if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
@@ -854,11 +854,11 @@ static void construct_block_size_descriptor_2d(
 			}
 
 			// Decode parameters
-			unsigned int x_weights;
-			unsigned int y_weights;
+			size_t x_weights;
+			size_t y_weights;
 			bool is_dual_plane;
-			unsigned int quant_mode;
-			unsigned int weight_bits;
+			size_t quant_mode;
+			size_t weight_bits;
 			bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits);
 
 			// Always skip invalid encodings for the current block size
@@ -965,7 +965,7 @@ static void construct_block_size_descriptor_2d(
 #endif
 
 	// Ensure the end of the array contains valid data (should never get read)
-	for (unsigned int i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
+	for (size_t i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
 	{
 		bsd.decimation_modes[i].maxprec_1plane = -1;
 		bsd.decimation_modes[i].maxprec_2planes = -1;
@@ -991,16 +991,16 @@ static void construct_block_size_descriptor_2d(
  * @param[out] bsd        The block size descriptor to populate.
  */
 static void construct_block_size_descriptor_3d(
-	unsigned int x_texels,
-	unsigned int y_texels,
-	unsigned int z_texels,
+	size_t x_texels,
+	size_t y_texels,
+	size_t z_texels,
 	block_size_descriptor& bsd
 ) {
 	// Store a remap table for storing packed decimation modes.
 	// Indexing uses [Z * 64 + Y *  8 + X] and max size for each axis is 6.
-	static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6;
+	static constexpr size_t MAX_DMI = 6 * 64 + 6 * 8 + 6;
 	int decimation_mode_index[MAX_DMI];
-	unsigned int decimation_mode_count = 0;
+	size_t decimation_mode_count = 0;
 
 	dt_init_working_buffers* wb = new dt_init_working_buffers;
 
@@ -1009,19 +1009,19 @@ static void construct_block_size_descriptor_3d(
 	bsd.zdim = static_cast<uint8_t>(z_texels);
 	bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels * z_texels);
 
-	for (unsigned int i = 0; i < MAX_DMI; i++)
+	for (size_t i = 0; i < MAX_DMI; i++)
 	{
 		decimation_mode_index[i] = -1;
 	}
 
 	// gather all the infill-modes that can be used with the current block size
-	for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++)
+	for (size_t x_weights = 2; x_weights <= x_texels; x_weights++)
 	{
-		for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++)
+		for (size_t y_weights = 2; y_weights <= y_texels; y_weights++)
 		{
-			for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++)
+			for (size_t z_weights = 2; z_weights <= z_texels; z_weights++)
 			{
-				unsigned int weight_count = x_weights * y_weights * z_weights;
+				size_t weight_count = x_weights * y_weights * z_weights;
 				if (weight_count > BLOCK_MAX_WEIGHTS)
 				{
 					continue;
@@ -1033,15 +1033,15 @@ static void construct_block_size_descriptor_3d(
 
 				int maxprec_1plane = -1;
 				int maxprec_2planes = -1;
-				for (unsigned int i = 0; i < 12; i++)
+				for (size_t i = 0; i < 12; i++)
 				{
-					unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
+					size_t bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
 					if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
 					{
 						maxprec_1plane = i;
 					}
 
-					unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
+					size_t bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
 					if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
 					{
 						maxprec_2planes = i;
@@ -1063,7 +1063,7 @@ static void construct_block_size_descriptor_3d(
 	}
 
 	// Ensure the end of the array contains valid data (should never get read)
-	for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
+	for (size_t i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
 	{
 		bsd.decimation_modes[i].maxprec_1plane = -1;
 		bsd.decimation_modes[i].maxprec_2planes = -1;
@@ -1078,20 +1078,20 @@ static void construct_block_size_descriptor_3d(
 	// Construct the list of block formats referencing the decimation tables
 
 	// Clear the list to a known-bad value
-	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
+	for (size_t i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
 	{
 		bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
 	}
 
-	unsigned int packed_idx = 0;
-	unsigned int bm_counts[2] { 0 };
+	size_t packed_idx = 0;
+	size_t bm_counts[2] { 0 };
 
 	// Iterate two times to build a usefully ordered list:
 	//   - Pass 0 - keep valid single plane block modes
 	//   - Pass 1 - keep valid dual plane block modes
-	for (unsigned int j = 0; j < 2; j++)
+	for (size_t j = 0; j < 2; j++)
 	{
-		for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
+		for (size_t i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
 		{
 			// Skip modes we've already included in a previous pass
 			if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
@@ -1099,12 +1099,12 @@ static void construct_block_size_descriptor_3d(
 				continue;
 			}
 
-			unsigned int x_weights;
-			unsigned int y_weights;
-			unsigned int z_weights;
+			size_t x_weights;
+			size_t y_weights;
+			size_t z_weights;
 			bool is_dual_plane;
-			unsigned int quant_mode;
-			unsigned int weight_bits;
+			size_t quant_mode;
+			size_t weight_bits;
 
 			bool valid = decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits);
 			// Skip invalid encodings
@@ -1163,11 +1163,11 @@ static void construct_block_size_descriptor_3d(
 
 /* See header for documentation. */
 void init_block_size_descriptor(
-	unsigned int x_texels,
-	unsigned int y_texels,
-	unsigned int z_texels,
+	size_t x_texels,
+	size_t y_texels,
+	size_t z_texels,
 	bool can_omit_modes,
-	unsigned int partition_count_cutoff,
+	size_t partition_count_cutoff,
 	float mode_cutoff,
 	block_size_descriptor& bsd
 ) {
diff --git a/Source/astcenc_compress_symbolic.cpp b/Source/astcenc_compress_symbolic.cpp
index 789eac19..8148c70c 100644
--- a/Source/astcenc_compress_symbolic.cpp
+++ b/Source/astcenc_compress_symbolic.cpp
@@ -37,10 +37,10 @@
 static void merge_endpoints(
 	const endpoints& ep_plane1,
 	const endpoints& ep_plane2,
-	unsigned int component_plane2,
+	size_t component_plane2,
 	endpoints& result
 ) {
-	unsigned int partition_count = ep_plane1.partition_count;
+	size_t partition_count = ep_plane1.partition_count;
 	assert(partition_count == 1);
 
 	vmask4 sep_mask = vint4::lane_id() == vint4(component_plane2);
@@ -73,15 +73,15 @@ static bool realign_weights_undecimated(
 	symbolic_compressed_block& scb
 ) {
 	// Get the partition descriptor
-	unsigned int partition_count = scb.partition_count;
+	size_t partition_count = scb.partition_count;
 	const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
 
 	// Get the quantization table
 	const block_mode& bm = bsd.get_block_mode(scb.block_mode);
-	unsigned int weight_quant_level = bm.quant_mode;
+	size_t weight_quant_level = bm.quant_mode;
 	const quant_and_transfer_table& qat = quant_and_xfer_tables[weight_quant_level];
 
-	unsigned int max_plane = bm.is_dual_plane;
+	size_t max_plane = bm.is_dual_plane;
 	int plane2_component = scb.plane2_component;
 	vmask4 plane_mask = vint4::lane_id() == vint4(plane2_component);
 
@@ -95,7 +95,7 @@ static bool realign_weights_undecimated(
 
 	promise(partition_count > 0);
 
-	for (unsigned int pa_idx = 0; pa_idx < partition_count; pa_idx++)
+	for (size_t pa_idx = 0; pa_idx < partition_count; pa_idx++)
 	{
 		unpack_color_endpoints(decode_mode,
 		                       scb.color_formats[pa_idx],
@@ -109,9 +109,9 @@ static bool realign_weights_undecimated(
 	bool adjustments = false;
 
 	// For each plane and partition ...
-	for (unsigned int pl_idx = 0; pl_idx <= max_plane; pl_idx++)
+	for (size_t pl_idx = 0; pl_idx <= max_plane; pl_idx++)
 	{
-		for (unsigned int pa_idx = 0; pa_idx < partition_count; pa_idx++)
+		for (size_t pa_idx = 0; pa_idx < partition_count; pa_idx++)
 		{
 			// Compute the endpoint delta for all components in current plane
 			vint4 epd = endpnt1[pa_idx] - endpnt0[pa_idx];
@@ -123,7 +123,7 @@ static bool realign_weights_undecimated(
 
 		// For each weight compute previous, current, and next errors
 		promise(bsd.texel_count > 0);
-		for (unsigned int texel = 0; texel < bsd.texel_count; texel++)
+		for (size_t texel = 0; texel < bsd.texel_count; texel++)
 		{
 			int uqw = dec_weights_uquant[texel];
 
@@ -136,7 +136,7 @@ static bool realign_weights_undecimated(
 			float weight_down = static_cast<float>(uqw_down - uqw);
 			float weight_up = static_cast<float>(uqw_up - uqw);
 
-			unsigned int partition = pi.partition_of_texel[texel];
+			size_t partition = pi.partition_of_texel[texel];
 			vfloat4 color_offset = offset[partition];
 			vfloat4 color_base   = endpnt0f[partition];
 
@@ -192,20 +192,20 @@ static bool realign_weights_decimated(
 	symbolic_compressed_block& scb
 ) {
 	// Get the partition descriptor
-	unsigned int partition_count = scb.partition_count;
+	size_t partition_count = scb.partition_count;
 	const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
 
 	// Get the quantization table
 	const block_mode& bm = bsd.get_block_mode(scb.block_mode);
-	unsigned int weight_quant_level = bm.quant_mode;
+	size_t weight_quant_level = bm.quant_mode;
 	const quant_and_transfer_table& qat = quant_and_xfer_tables[weight_quant_level];
 
 	// Get the decimation table
 	const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
-	unsigned int weight_count = di.weight_count;
+	size_t weight_count = di.weight_count;
 	assert(weight_count != bsd.texel_count);
 
-	unsigned int max_plane = bm.is_dual_plane;
+	size_t max_plane = bm.is_dual_plane;
 	int plane2_component = scb.plane2_component;
 	vmask4 plane_mask = vint4::lane_id() == vint4(plane2_component);
 
@@ -220,7 +220,7 @@ static bool realign_weights_decimated(
 	promise(partition_count > 0);
 	promise(weight_count > 0);
 
-	for (unsigned int pa_idx = 0; pa_idx < partition_count; pa_idx++)
+	for (size_t pa_idx = 0; pa_idx < partition_count; pa_idx++)
 	{
 		unpack_color_endpoints(decode_mode,
 		                       scb.color_formats[pa_idx],
@@ -234,9 +234,9 @@ static bool realign_weights_decimated(
 	bool adjustments = false;
 
 	// For each plane and partition ...
-	for (unsigned int pl_idx = 0; pl_idx <= max_plane; pl_idx++)
+	for (size_t pl_idx = 0; pl_idx <= max_plane; pl_idx++)
 	{
-		for (unsigned int pa_idx = 0; pa_idx < partition_count; pa_idx++)
+		for (size_t pa_idx = 0; pa_idx < partition_count; pa_idx++)
 		{
 			// Compute the endpoint delta for all components in current plane
 			vint4 epd = endpnt1[pa_idx] - endpnt0[pa_idx];
@@ -248,7 +248,7 @@ static bool realign_weights_decimated(
 
 		// Create an unquantized weight grid for this decimation level
 		ASTCENC_ALIGNAS float uq_weightsf[BLOCK_MAX_WEIGHTS];
-		for (unsigned int we_idx = 0; we_idx < weight_count; we_idx += ASTCENC_SIMD_WIDTH)
+		for (size_t we_idx = 0; we_idx < weight_count; we_idx += ASTCENC_SIMD_WIDTH)
 		{
 			vint unquant_value(dec_weights_uquant + we_idx);
 			vfloat unquant_valuef = int_to_float(unquant_value);
@@ -256,7 +256,7 @@ static bool realign_weights_decimated(
 		}
 
 		// For each weight compute previous, current, and next errors
-		for (unsigned int we_idx = 0; we_idx < weight_count; we_idx++)
+		for (size_t we_idx = 0; we_idx < weight_count; we_idx++)
 		{
 			int uqw = dec_weights_uquant[we_idx];
 			uint32_t prev_and_next = qat.prev_next_values[uqw];
@@ -273,11 +273,11 @@ static bool realign_weights_decimated(
 			vfloat4 error_upv = vfloat4::zero();
 
 			// Interpolate the colors to create the diffs
-			unsigned int texels_to_evaluate = di.weight_texel_count[we_idx];
+			size_t texels_to_evaluate = di.weight_texel_count[we_idx];
 			promise(texels_to_evaluate > 0);
-			for (unsigned int te_idx = 0; te_idx < texels_to_evaluate; te_idx++)
+			for (size_t te_idx = 0; te_idx < texels_to_evaluate; te_idx++)
 			{
-				unsigned int texel = di.weight_texels_tr[te_idx][we_idx];
+				size_t texel = di.weight_texels_tr[te_idx][we_idx];
 
 				float tw_base = di.texel_contrib_for_weight[te_idx][we_idx];
 
@@ -293,7 +293,7 @@ static bool realign_weights_decimated(
 				float weight_down = weight_base + uqw_diff_down * tw_base - weight_base;
 				float weight_up = weight_base + uqw_diff_up * tw_base - weight_base;
 
-				unsigned int partition = pi.partition_of_texel[texel];
+				size_t partition = pi.partition_of_texel[texel];
 				vfloat4 color_offset = offset[partition];
 				vfloat4 color_base   = endpnt0f[partition];
 
@@ -356,8 +356,8 @@ static float compress_symbolic_block_for_partition_1plane(
 	const image_block& blk,
 	bool only_always,
 	float tune_errorval_threshold,
-	unsigned int partition_count,
-	unsigned int partition_index,
+	size_t partition_count,
+	size_t partition_index,
 	symbolic_compressed_block& scb,
 	compression_working_buffers& tmpbuf,
 	int quant_limit
@@ -385,10 +385,10 @@ static float compress_symbolic_block_for_partition_1plane(
 	uint8_t* dec_weights_uquant = tmpbuf.dec_weights_uquant;
 
 	// For each decimation mode, compute an ideal set of weights with no quantization
-	unsigned int max_decimation_modes = only_always ? bsd.decimation_mode_count_always
+	size_t max_decimation_modes = only_always ? bsd.decimation_mode_count_always
 	                                                : bsd.decimation_mode_count_selected;
 	promise(max_decimation_modes > 0);
-	for (unsigned int i = 0; i < max_decimation_modes; i++)
+	for (size_t i = 0; i < max_decimation_modes; i++)
 	{
 		const auto& dm = bsd.get_decimation_mode(i);
 		if (!dm.is_ref_1plane(static_cast<quant_method>(max_weight_quant)))
@@ -407,7 +407,7 @@ static float compress_symbolic_block_for_partition_1plane(
 	// Compute maximum colors for the endpoints and ideal weights, then for each endpoint and ideal
 	// weight pair, compute the smallest weight that will result in a color value greater than 1
 	vfloat4 min_ep(10.0f);
-	for (unsigned int i = 0; i < partition_count; i++)
+	for (size_t i = 0; i < partition_count; i++)
 	{
 		vfloat4 ep = (vfloat4(1.0f) - ei.ep.endpt0[i]) / (ei.ep.endpt1[i] - ei.ep.endpt0[i]);
 
@@ -436,10 +436,10 @@ static float compress_symbolic_block_for_partition_1plane(
 		115 - 4, 111 - 4 - PARTITION_INDEX_BITS, 108 - 4 - PARTITION_INDEX_BITS, 105 - 4 - PARTITION_INDEX_BITS
 	};
 
-	unsigned int max_block_modes = only_always ? bsd.block_mode_count_1plane_always
+	size_t max_block_modes = only_always ? bsd.block_mode_count_1plane_always
 	                                           : bsd.block_mode_count_1plane_selected;
 	promise(max_block_modes > 0);
-	for (unsigned int i = 0; i < max_block_modes; i++)
+	for (size_t i = 0; i < max_block_modes; i++)
 	{
 		const block_mode& bm = bsd.block_modes[i];
 
@@ -492,7 +492,7 @@ static float compress_symbolic_block_for_partition_1plane(
 	quant_method color_quant_level[TUNE_MAX_TRIAL_CANDIDATES];
 	quant_method color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES];
 
-	unsigned int candidate_count = compute_ideal_endpoint_formats(
+	size_t candidate_count = compute_ideal_endpoint_formats(
 	    pi, blk, ei.ep, qwt_bitcounts, qwt_errors,
 	    config.tune_candidate_limit, 0, max_block_modes,
 	    partition_format_specifiers, block_mode_index,
@@ -502,7 +502,7 @@ static float compress_symbolic_block_for_partition_1plane(
 	float best_errorval_in_mode = ERROR_CALC_DEFAULT;
 	float best_errorval_in_scb = scb.errorval;
 
-	for (unsigned int i = 0; i < candidate_count; i++)
+	for (size_t i = 0; i < candidate_count; i++)
 	{
 		TRACE_NODE(node0, "candidate");
 
@@ -528,12 +528,12 @@ static float compress_symbolic_block_for_partition_1plane(
 
 		uint8_t* u8_weight_src = dec_weights_uquant + BLOCK_MAX_WEIGHTS * bm_packed_index;
 
-		for (unsigned int j = 0; j < di.weight_count; j++)
+		for (size_t j = 0; j < di.weight_count; j++)
 		{
 			workscb.weights[j] = u8_weight_src[j];
 		}
 
-		for (unsigned int l = 0; l < config.tune_refinement_limit; l++)
+		for (size_t l = 0; l < config.tune_refinement_limit; l++)
 		{
 			recompute_ideal_colors_1plane(
 			    blk, pi, di, workscb.weights,
@@ -541,7 +541,7 @@ static float compress_symbolic_block_for_partition_1plane(
 
 			// Quantize the chosen color, tracking if worth trying the mod value
 			bool all_same = color_quant_level[i] != color_quant_level_mod[i];
-			for (unsigned int j = 0; j < partition_count; j++)
+			for (size_t j = 0; j < partition_count; j++)
 			{
 				workscb.color_formats[j] = pack_color_endpoints(
 				    workep.endpt0[j],
@@ -564,7 +564,7 @@ static float compress_symbolic_block_for_partition_1plane(
 				uint8_t colorvals[BLOCK_MAX_PARTITIONS][8];
 				uint8_t color_formats_mod[BLOCK_MAX_PARTITIONS] { 0 };
 				bool all_same_mod = true;
-				for (unsigned int j = 0; j < partition_count; j++)
+				for (size_t j = 0; j < partition_count; j++)
 				{
 					color_formats_mod[j] = pack_color_endpoints(
 					    workep.endpt0[j],
@@ -586,9 +586,9 @@ static float compress_symbolic_block_for_partition_1plane(
 				if (all_same_mod)
 				{
 					workscb.color_formats_matched = 1;
-					for (unsigned int j = 0; j < BLOCK_MAX_PARTITIONS; j++)
+					for (size_t j = 0; j < BLOCK_MAX_PARTITIONS; j++)
 					{
-						for (unsigned int k = 0; k < 8; k++)
+						for (size_t k = 0; k < 8; k++)
 						{
 							workscb.color_values[j][k] = colorvals[j][k];
 						}
@@ -623,7 +623,7 @@ static float compress_symbolic_block_for_partition_1plane(
 				// iteration can help more so we give it a extra 8% leeway. Use this knowledge to
 				// drive a heuristic to skip blocks that are unlikely to catch up with the best
 				// block we have already.
-				unsigned int iters_remaining = config.tune_refinement_limit - l;
+				size_t iters_remaining = config.tune_refinement_limit - l;
 				float threshold = (0.045f * static_cast<float>(iters_remaining)) + 1.08f;
 				if (errorval > (threshold * best_errorval_in_scb))
 				{
@@ -671,7 +671,7 @@ static float compress_symbolic_block_for_partition_1plane(
 			// Average refinement improvement is 3.5% per iteration, so skip blocks that are
 			// unlikely to catch up with the best block we have already. Assume a 4.5% per step to
 			// give benefit of the doubt ...
-			unsigned int iters_remaining = config.tune_refinement_limit - 1 - l;
+			size_t iters_remaining = config.tune_refinement_limit - 1 - l;
 			float threshold = (0.045f * static_cast<float>(iters_remaining)) + 1.0f;
 			if (errorval > (threshold * best_errorval_in_scb))
 			{
@@ -718,7 +718,7 @@ static float compress_symbolic_block_for_partition_2planes(
 	const block_size_descriptor& bsd,
 	const image_block& blk,
 	float tune_errorval_threshold,
-	unsigned int plane2_component,
+	size_t plane2_component,
 	symbolic_compressed_block& scb,
 	compression_working_buffers& tmpbuf,
 	int quant_limit
@@ -740,7 +740,7 @@ static float compress_symbolic_block_for_partition_2planes(
 	uint8_t* dec_weights_uquant = tmpbuf.dec_weights_uquant;
 
 	// For each decimation mode, compute an ideal set of weights with no quantization
-	for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++)
+	for (size_t i = 0; i < bsd.decimation_mode_count_selected; i++)
 	{
 		const auto& dm = bsd.get_decimation_mode(i);
 		if (!dm.is_ref_2plane(static_cast<quant_method>(max_weight_quant)))
@@ -801,10 +801,10 @@ static float compress_symbolic_block_for_partition_2planes(
 	int8_t* qwt_bitcounts = tmpbuf.qwt_bitcounts;
 	float* qwt_errors = tmpbuf.qwt_errors;
 
-	unsigned int start_2plane = bsd.block_mode_count_1plane_selected;
-	unsigned int end_2plane = bsd.block_mode_count_1plane_2plane_selected;
+	size_t start_2plane = bsd.block_mode_count_1plane_selected;
+	size_t end_2plane = bsd.block_mode_count_1plane_2plane_selected;
 
-	for (unsigned int i = start_2plane; i < end_2plane; i++)
+	for (size_t i = start_2plane; i < end_2plane; i++)
 	{
 		const block_mode& bm = bsd.block_modes[i];
 		assert(bm.is_dual_plane);
@@ -827,7 +827,7 @@ static float compress_symbolic_block_for_partition_2planes(
 			weight_high_value2[i] = 1.0f;
 		}
 
-		unsigned int decimation_mode = bm.decimation_mode;
+		size_t decimation_mode = bm.decimation_mode;
 		const auto& di = bsd.get_decimation_info(decimation_mode);
 
 		ASTCENC_ALIGNAS float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
@@ -871,7 +871,7 @@ static float compress_symbolic_block_for_partition_2planes(
 	merge_endpoints(ei1.ep, ei2.ep, plane2_component, epm);
 
 	const auto& pi = bsd.get_partition_info(1, 0);
-	unsigned int candidate_count = compute_ideal_endpoint_formats(
+	size_t candidate_count = compute_ideal_endpoint_formats(
 	    pi, blk, epm, qwt_bitcounts, qwt_errors,
 	    config.tune_candidate_limit,
 		bsd.block_mode_count_1plane_selected, bsd.block_mode_count_1plane_2plane_selected,
@@ -882,7 +882,7 @@ static float compress_symbolic_block_for_partition_2planes(
 	float best_errorval_in_mode = ERROR_CALC_DEFAULT;
 	float best_errorval_in_scb = scb.errorval;
 
-	for (unsigned int i = 0; i < candidate_count; i++)
+	for (size_t i = 0; i < candidate_count; i++)
 	{
 		TRACE_NODE(node0, "candidate");
 
@@ -915,7 +915,7 @@ static float compress_symbolic_block_for_partition_2planes(
 			workscb.weights[j + WEIGHTS_PLANE2_OFFSET] = u8_weight2_src[j];
 		}
 
-		for (unsigned int l = 0; l < config.tune_refinement_limit; l++)
+		for (size_t l = 0; l < config.tune_refinement_limit; l++)
 		{
 			recompute_ideal_colors_2planes(
 			    blk, bsd, di,
@@ -957,7 +957,7 @@ static float compress_symbolic_block_for_partition_2planes(
 				// iteration can help more so we give it a extra 8% leeway. Use this knowledge to
 				// drive a heuristic to skip blocks that are unlikely to catch up with the best
 				// block we have already.
-				unsigned int iters_remaining = config.tune_refinement_limit - l;
+				size_t iters_remaining = config.tune_refinement_limit - l;
 				float threshold = (0.045f * static_cast<float>(iters_remaining)) + 1.08f;
 				if (errorval > (threshold * best_errorval_in_scb))
 				{
@@ -1006,7 +1006,7 @@ static float compress_symbolic_block_for_partition_2planes(
 			// Average refinement improvement is 3.5% per iteration, so skip blocks that are
 			// unlikely to catch up with the best block we have already. Assume a 4.5% per step to
 			// give benefit of the doubt ...
-			unsigned int iters_remaining = config.tune_refinement_limit - 1 - l;
+			size_t iters_remaining = config.tune_refinement_limit - 1 - l;
 			float threshold = (0.045f * static_cast<float>(iters_remaining)) + 1.0f;
 			if (errorval > (threshold * best_errorval_in_scb))
 			{
@@ -1187,13 +1187,13 @@ void compress_block(
 	bool block_skip_two_plane = false;
 	int max_partitions = ctx.config.tune_partition_count_limit;
 
-	unsigned int requested_partition_indices[3] {
+	size_t requested_partition_indices[3] {
 		ctx.config.tune_2partition_index_limit,
 		ctx.config.tune_3partition_index_limit,
 		ctx.config.tune_4partition_index_limit
 	};
 
-	unsigned int requested_partition_trials[3] {
+	size_t requested_partition_trials[3] {
 		ctx.config.tune_2partitioning_candidate_limit,
 		ctx.config.tune_3partitioning_candidate_limit,
 		ctx.config.tune_4partitioning_candidate_limit
@@ -1369,19 +1369,19 @@ void compress_block(
 	// Find best blocks for 2, 3 and 4 partitions
 	for (int partition_count = 2; partition_count <= max_partitions; partition_count++)
 	{
-		unsigned int partition_indices[TUNE_MAX_PARTITIONING_CANDIDATES];
+		size_t partition_indices[TUNE_MAX_PARTITIONING_CANDIDATES];
 
-		unsigned int requested_indices = requested_partition_indices[partition_count - 2];
+		size_t requested_indices = requested_partition_indices[partition_count - 2];
 
-		unsigned int requested_trials = requested_partition_trials[partition_count - 2];
+		size_t requested_trials = requested_partition_trials[partition_count - 2];
 		requested_trials = astc::min(requested_trials, requested_indices);
 
-		unsigned int actual_trials = find_best_partition_candidates(
+		size_t actual_trials = find_best_partition_candidates(
 		    bsd, blk, partition_count, requested_indices, partition_indices, requested_trials);
 
 		float best_error_in_prev = best_errorvals_for_pcount[partition_count - 2];
 
-		for (unsigned int i = 0; i < actual_trials; i++)
+		for (size_t i = 0; i < actual_trials; i++)
 		{
 			TRACE_NODE(node1, "pass");
 			trace_add_data("partition_count", partition_count);
diff --git a/Source/astcenc_compute_variance.cpp b/Source/astcenc_compute_variance.cpp
index 48a4af8c..5a3d9ba2 100644
--- a/Source/astcenc_compute_variance.cpp
+++ b/Source/astcenc_compute_variance.cpp
@@ -420,26 +420,26 @@ void compute_pixel_region_variance(
 }
 
 /* See header for documentation. */
-unsigned int init_compute_averages(
+size_t init_compute_averages(
 	const astcenc_image& img,
-	unsigned int alpha_kernel_radius,
+	size_t alpha_kernel_radius,
 	const astcenc_swizzle& swz,
 	avg_args& ag
 ) {
-	unsigned int size_x = img.dim_x;
-	unsigned int size_y = img.dim_y;
-	unsigned int size_z = img.dim_z;
+	size_t size_x = img.dim_x;
+	size_t size_y = img.dim_y;
+	size_t size_z = img.dim_z;
 
 	// Compute maximum block size and from that the working memory buffer size
-	unsigned int kernel_radius = alpha_kernel_radius;
-	unsigned int kerneldim = 2 * kernel_radius + 1;
+	size_t kernel_radius = alpha_kernel_radius;
+	size_t kerneldim = 2 * kernel_radius + 1;
 
 	bool have_z = (size_z > 1);
-	unsigned int max_blk_size_xy = have_z ? 16 : 32;
-	unsigned int max_blk_size_z = astc::min(size_z, have_z ? 16u : 1u);
+	size_t max_blk_size_xy = have_z ? 16 : 32;
+	size_t max_blk_size_z = astc::min(size_z, have_z ? 16_z : 1_z);
 
-	unsigned int max_padsize_xy = max_blk_size_xy + kerneldim;
-	unsigned int max_padsize_z = max_blk_size_z + (have_z ? kerneldim : 0);
+	size_t max_padsize_xy = max_blk_size_xy + kerneldim;
+	size_t max_padsize_z = max_blk_size_z + (have_z ? kerneldim : 0);
 
 	// Perform block-wise averages calculations across the image
 	// Initialize fields which are not populated until later
@@ -464,8 +464,8 @@ unsigned int init_compute_averages(
 	ag.work_memory_size = 2 * max_padsize_xy * max_padsize_xy * max_padsize_z;
 
 	// The parallel task count
-	unsigned int z_tasks = (size_z + max_blk_size_z - 1) / max_blk_size_z;
-	unsigned int y_tasks = (size_y + max_blk_size_xy - 1) / max_blk_size_xy;
+	size_t z_tasks = (size_z + max_blk_size_z - 1) / max_blk_size_z;
+	size_t y_tasks = (size_y + max_blk_size_xy - 1) / max_blk_size_xy;
 	return z_tasks * y_tasks;
 }
 
diff --git a/Source/astcenc_decompress_symbolic.cpp b/Source/astcenc_decompress_symbolic.cpp
index e7791eef..a54c15e2 100644
--- a/Source/astcenc_decompress_symbolic.cpp
+++ b/Source/astcenc_decompress_symbolic.cpp
@@ -101,7 +101,7 @@ void unpack_weights(
 		vtable_64x8 table;
 		vtable_prepare(table, scb.weights);
 
-		for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vint summed_value(8);
 			vint weight_count(di.texel_weight_count + i);
@@ -130,7 +130,7 @@ void unpack_weights(
 		vtable_32x8 tab_plane2;
 		vtable_prepare(tab_plane2, scb.weights + 32);
 
-		for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vint sum_plane1(8);
 			vint sum_plane2(8);
@@ -190,7 +190,7 @@ void decompress_symbolic_block(
 	// If we detected an error-block, blow up immediately.
 	if (scb.block_type == SYM_BTYPE_ERROR)
 	{
-		for (unsigned int i = 0; i < bsd.texel_count; i++)
+		for (size_t i = 0; i < bsd.texel_count; i++)
 		{
 			blk.data_r[i] = error_color_nan();
 			blk.data_g[i] = error_color_nan();
@@ -243,7 +243,7 @@ void decompress_symbolic_block(
 			}
 		}
 
-		for (unsigned int i = 0; i < bsd.texel_count; i++)
+		for (size_t i = 0; i < bsd.texel_count; i++)
 		{
 			blk.data_r[i] = color.lane<0>();
 			blk.data_g[i] = color.lane<1>();
@@ -356,8 +356,8 @@ float compute_symbolic_block_difference_2plane(
 	vmask4 u8_mask = get_u8_component_mask(config.profile, blk);
 
 	// Unpack and compute error for each texel in the partition
-	unsigned int texel_count = bsd.texel_count;
-	for (unsigned int i = 0; i < texel_count; i++)
+	size_t texel_count = bsd.texel_count;
+	for (size_t i = 0; i < texel_count; i++)
 	{
 		vint4 weight = select(vint4(plane1_weights[i]), vint4(plane2_weights[i]), plane2_mask);
 		vint4 colori = lerp_color_int(u8_mask, ep0, ep1, weight);
@@ -423,7 +423,7 @@ float compute_symbolic_block_difference_1plane(
 	assert(scb.block_mode >= 0);
 
 	// Get the appropriate partition-table entry
-	unsigned int partition_count = scb.partition_count;
+	size_t partition_count = scb.partition_count;
 	const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
 
 	// Get the appropriate block descriptor
@@ -437,7 +437,7 @@ float compute_symbolic_block_difference_1plane(
 	vmask4 u8_mask = get_u8_component_mask(config.profile, blk);
 
 	vfloat4 summa = vfloat4::zero();
-	for (unsigned int i = 0; i < partition_count; i++)
+	for (size_t i = 0; i < partition_count; i++)
 	{
 		// Decode the color endpoints for this partition
 		vint4 ep0;
@@ -452,10 +452,10 @@ float compute_symbolic_block_difference_1plane(
 		                       ep0, ep1);
 
 		// Unpack and compute error for each texel in the partition
-		unsigned int texel_count = pi.partition_texel_count[i];
-		for (unsigned int j = 0; j < texel_count; j++)
+		size_t texel_count = pi.partition_texel_count[i];
+		for (size_t j = 0; j < texel_count; j++)
 		{
-			unsigned int tix = pi.texels_of_partition[i][j];
+			size_t tix = pi.texels_of_partition[i][j];
 			vint4 colori = lerp_color_int(u8_mask, ep0, ep1,
 			                              vint4(plane1_weights[tix]));
 
@@ -546,8 +546,8 @@ float compute_symbolic_block_difference_1plane_1partition(
 
 	vint lane_id = vint::lane_id();
 
-	unsigned int texel_count = bsd.texel_count;
-	for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+	size_t texel_count = bsd.texel_count;
+	for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 	{
 		// Compute EP1 contribution
 		vint weight1 = vint::loada(plane1_weights + i);
diff --git a/Source/astcenc_diagnostic_trace.cpp b/Source/astcenc_diagnostic_trace.cpp
index bcd6fa72..272ab7f2 100644
--- a/Source/astcenc_diagnostic_trace.cpp
+++ b/Source/astcenc_diagnostic_trace.cpp
@@ -236,7 +236,7 @@ void trace_add_data(
 /* See header for documentation. */
 void trace_add_data(
 	const char* key,
-	unsigned int value
+	size_t value
 ) {
 	TraceNode* node = g_TraceLog->get_current_leaf();
 	node->add_attrib("int", key, std::to_string(value));
diff --git a/Source/astcenc_diagnostic_trace.h b/Source/astcenc_diagnostic_trace.h
index f5586b0a..c4a0125f 100644
--- a/Source/astcenc_diagnostic_trace.h
+++ b/Source/astcenc_diagnostic_trace.h
@@ -111,7 +111,7 @@ class TraceNode
 	/**
 	 * @brief The number of attributes and child nodes in this node.
 	 */
-	unsigned int m_attrib_count { 0 };
+	size_t m_attrib_count { 0 };
 };
 
 /**
@@ -201,12 +201,12 @@ void trace_add_data(const char* key, float value);
 void trace_add_data(const char* key, int value);
 
 /**
- * @brief Add an unsigned integer annotation to the current node.
+ * @brief Add an size_teger annotation to the current node.
  *
  * @param key     The name of the attribute.
  * @param value   The value of the attribute.
  */
-void trace_add_data(const char* key, unsigned int value);
+void trace_add_data(const char* key, size_t value);
 
 #else
 
diff --git a/Source/astcenc_entry.cpp b/Source/astcenc_entry.cpp
index 4023797a..3af908be 100644
--- a/Source/astcenc_entry.cpp
+++ b/Source/astcenc_entry.cpp
@@ -39,16 +39,16 @@
 struct astcenc_preset_config
 {
 	float quality;
-	unsigned int tune_partition_count_limit;
-	unsigned int tune_2partition_index_limit;
-	unsigned int tune_3partition_index_limit;
-	unsigned int tune_4partition_index_limit;
-	unsigned int tune_block_mode_limit;
-	unsigned int tune_refinement_limit;
-	unsigned int tune_candidate_limit;
-	unsigned int tune_2partitioning_candidate_limit;
-	unsigned int tune_3partitioning_candidate_limit;
-	unsigned int tune_4partitioning_candidate_limit;
+	size_t tune_partition_count_limit;
+	size_t tune_2partition_index_limit;
+	size_t tune_3partition_index_limit;
+	size_t tune_4partition_index_limit;
+	size_t tune_block_mode_limit;
+	size_t tune_refinement_limit;
+	size_t tune_candidate_limit;
+	size_t tune_2partitioning_candidate_limit;
+	size_t tune_3partitioning_candidate_limit;
+	size_t tune_4partitioning_candidate_limit;
 	float tune_db_limit_a_base;
 	float tune_db_limit_b_base;
 	float tune_mse_overshoot;
@@ -192,9 +192,9 @@ static astcenc_error validate_profile(
  * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  */
 static astcenc_error validate_block_size(
-	unsigned int block_x,
-	unsigned int block_y,
-	unsigned int block_z
+	size_t block_x,
+	size_t block_y,
+	size_t block_z
 ) {
 	// Test if this is a legal block size at all
 	bool is_legal = (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) ||
@@ -224,10 +224,10 @@ static astcenc_error validate_block_size(
  */
 static astcenc_error validate_flags(
 	astcenc_profile profile,
-	unsigned int flags
+	size_t flags
 ) {
 	// Flags field must not contain any unknown flag bits
-	unsigned int exMask = ~ASTCENC_ALL_FLAGS;
+	size_t exMask = ~ASTCENC_ALL_FLAGS;
 	if (popcount(flags & exMask) != 0)
 	{
 		return ASTCENC_ERR_BAD_FLAGS;
@@ -396,16 +396,16 @@ static astcenc_error validate_config(
 
 	config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f);
 
-	config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u);
-	config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
-	config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
-	config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
-	config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u);
-	config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u);
-	config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES);
-	config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
-	config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
-	config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
+	config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1_z, 4_z);
+	config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1_z, BLOCK_MAX_PARTITIONINGS);
+	config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1_z, BLOCK_MAX_PARTITIONINGS);
+	config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1_z, BLOCK_MAX_PARTITIONINGS);
+	config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1_z, 100_z);
+	config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1_z);
+	config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1_z, TUNE_MAX_TRIAL_CANDIDATES);
+	config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1_z, TUNE_MAX_PARTITIONING_CANDIDATES);
+	config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1_z, TUNE_MAX_PARTITIONING_CANDIDATES);
+	config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1_z, TUNE_MAX_PARTITIONING_CANDIDATES);
 	config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f);
 	config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f);
 	config.tune_2partition_early_out_limit_factor = astc::max(config.tune_2partition_early_out_limit_factor, 0.0f);
@@ -435,11 +435,11 @@ static astcenc_error validate_config(
 /* See header for documentation. */
 astcenc_error astcenc_config_init(
 	astcenc_profile profile,
-	unsigned int block_x,
-	unsigned int block_y,
-	unsigned int block_z,
+	size_t block_x,
+	size_t block_y,
+	size_t block_z,
 	float quality,
-	unsigned int flags,
+	size_t flags,
 	astcenc_config* configp
 ) {
 	astcenc_error status;
@@ -455,7 +455,7 @@ astcenc_error astcenc_config_init(
 	std::memset(&config, 0, sizeof(config));
 
 	// Process the block size
-	block_z = astc::max(block_z, 1u); // For 2D blocks Z==0 is accepted, but convert to 1
+	block_z = astc::max(block_z, 1_z); // For 2D blocks Z==0 is accepted, but convert to 1
 	status = validate_block_size(block_x, block_y, block_z);
 	if (status != ASTCENC_SUCCESS)
 	{
@@ -546,7 +546,7 @@ astcenc_error astcenc_config_init(
 		#define LERPI(param) astc::flt2int_rtn(\
 		                         (static_cast<float>(node_a.param) * wt_node_a) + \
 		                         (static_cast<float>(node_b.param) * wt_node_b))
-		#define LERPUI(param) static_cast<unsigned int>(LERPI(param))
+		#define LERPUI(param) static_cast<size_t>(LERPI(param))
 
 		config.tune_partition_count_limit = LERPI(tune_partition_count_limit);
 		config.tune_2partition_index_limit = LERPI(tune_2partition_index_limit);
@@ -612,7 +612,7 @@ astcenc_error astcenc_config_init(
 		// Normal map encoding uses L+A blocks, so allow one more partitioning
 		// than normal. We need need fewer bits for endpoints, so more likely
 		// to be able to use more partitions than an RGB/RGBA block
-		config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1u, 4u);
+		config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1_z, 4_z);
 
 		config.cw_g_weight = 0.0f;
 		config.cw_b_weight = 0.0f;
@@ -656,7 +656,7 @@ astcenc_error astcenc_config_init(
 /* See header for documentation. */
 astcenc_error astcenc_context_alloc(
 	const astcenc_config* configp,
-	unsigned int thread_count,
+	size_t thread_count,
 	astcenc_context** context
 ) {
 	astcenc_error status;
@@ -790,7 +790,7 @@ void astcenc_context_free(
  */
 static void compress_image(
 	astcenc_context& ctxo,
-	unsigned int thread_index,
+	size_t thread_index,
 	const astcenc_image& image,
 	const astcenc_swizzle& swizzle,
 	uint8_t* buffer
@@ -851,18 +851,18 @@ static void compress_image(
 	// All threads run this processing loop until there is no work remaining
 	while (true)
 	{
-		unsigned int count;
-		unsigned int base = ctxo.manage_compress.get_task_assignment(16, count);
+		size_t count;
+		size_t base = ctxo.manage_compress.get_task_assignment(16, count);
 		if (!count)
 		{
 			break;
 		}
 
-		for (unsigned int i = base; i < base + count; i++)
+		for (size_t i = base; i < base + count; i++)
 		{
 			// Decode i into x, y, z block indices
 			int z = i / plane_blocks;
-			unsigned int rem = i - (z * plane_blocks);
+			size_t rem = i - (z * plane_blocks);
 			int y = rem / row_blocks;
 			int x = rem - (y * row_blocks);
 
@@ -969,14 +969,14 @@ static void compute_averages(
 	// All threads run this processing loop until there is no work remaining
 	while (true)
 	{
-		unsigned int count;
-		unsigned int base = ctx.manage_avg.get_task_assignment(16, count);
+		size_t count;
+		size_t base = ctx.manage_avg.get_task_assignment(16, count);
 		if (!count)
 		{
 			break;
 		}
 
-		for (unsigned int i = base; i < base + count; i++)
+		for (size_t i = base; i < base + count; i++)
 		{
 			int z = (i / (y_tasks)) * step_z;
 			int y = (i - (z * y_tasks)) * step_xy;
@@ -1010,7 +1010,7 @@ astcenc_error astcenc_compress_image(
 	const astcenc_swizzle* swizzle,
 	uint8_t* data_out,
 	size_t data_len,
-	unsigned int thread_index
+	size_t thread_index
 ) {
 #if defined(ASTCENC_DECOMPRESS_ONLY)
 	(void)ctxo;
@@ -1041,13 +1041,13 @@ astcenc_error astcenc_compress_image(
 		return ASTCENC_ERR_BAD_PARAM;
 	}
 
-	unsigned int block_x = ctx->config.block_x;
-	unsigned int block_y = ctx->config.block_y;
-	unsigned int block_z = ctx->config.block_z;
+	size_t block_x = ctx->config.block_x;
+	size_t block_y = ctx->config.block_y;
+	size_t block_z = ctx->config.block_z;
 
-	unsigned int xblocks = (image.dim_x + block_x - 1) / block_x;
-	unsigned int yblocks = (image.dim_y + block_y - 1) / block_y;
-	unsigned int zblocks = (image.dim_z + block_z - 1) / block_z;
+	size_t xblocks = (image.dim_x + block_x - 1) / block_x;
+	size_t yblocks = (image.dim_y + block_y - 1) / block_y;
+	size_t zblocks = (image.dim_z + block_z - 1) / block_z;
 
 	// Check we have enough output space (16 bytes per block)
 	size_t size_needed = xblocks * yblocks * zblocks * 16;
@@ -1153,7 +1153,7 @@ astcenc_error astcenc_decompress_image(
 	size_t data_len,
 	astcenc_image* image_outp,
 	const astcenc_swizzle* swizzle,
-	unsigned int thread_index
+	size_t thread_index
 ) {
 	astcenc_error status;
 	astcenc_image& image_out = *image_outp;
@@ -1171,14 +1171,14 @@ astcenc_error astcenc_decompress_image(
 		return status;
 	}
 
-	unsigned int block_x = ctx->config.block_x;
-	unsigned int block_y = ctx->config.block_y;
-	unsigned int block_z = ctx->config.block_z;
+	size_t block_x = ctx->config.block_x;
+	size_t block_y = ctx->config.block_y;
+	size_t block_z = ctx->config.block_z;
 
-	unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x;
-	unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y;
-	unsigned int zblocks = (image_out.dim_z + block_z - 1) / block_z;
-	unsigned int block_count = zblocks * yblocks * xblocks;
+	size_t xblocks = (image_out.dim_x + block_x - 1) / block_x;
+	size_t yblocks = (image_out.dim_y + block_y - 1) / block_y;
+	size_t zblocks = (image_out.dim_z + block_z - 1) / block_z;
+	size_t block_count = zblocks * yblocks * xblocks;
 
 	int row_blocks = xblocks;
 	int plane_blocks = xblocks * yblocks;
@@ -1208,22 +1208,22 @@ astcenc_error astcenc_decompress_image(
 	// All threads run this processing loop until there is no work remaining
 	while (true)
 	{
-		unsigned int count;
-		unsigned int base = ctxo->manage_decompress.get_task_assignment(128, count);
+		size_t count;
+		size_t base = ctxo->manage_decompress.get_task_assignment(128, count);
 		if (!count)
 		{
 			break;
 		}
 
-		for (unsigned int i = base; i < base + count; i++)
+		for (size_t i = base; i < base + count; i++)
 		{
 			// Decode i into x, y, z block indices
 			int z = i / plane_blocks;
-			unsigned int rem = i - (z * plane_blocks);
+			size_t rem = i - (z * plane_blocks);
 			int y = rem / row_blocks;
 			int x = rem - (y * row_blocks);
 
-			unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16;
+			size_t offset = (((z * yblocks + y) * xblocks) + x) * 16;
 			const uint8_t* bp = data + offset;
 
 			symbolic_compressed_block scb;
@@ -1320,7 +1320,7 @@ astcenc_error astcenc_get_block_info(
 	info->weight_level_count = get_quant_level(bm.get_weight_quant_mode());
 
 	// Unpack color endpoints for each active partition
-	for (unsigned int i = 0; i < scb.partition_count; i++)
+	for (size_t i = 0; i < scb.partition_count; i++)
 	{
 		bool rgb_hdr;
 		bool a_hdr;
@@ -1352,7 +1352,7 @@ astcenc_error astcenc_get_block_info(
 	int weight_plane2[BLOCK_MAX_TEXELS];
 
 	unpack_weights(bsd, scb, di, bm.is_dual_plane, weight_plane1, weight_plane2);
-	for (unsigned int i = 0; i < bsd.texel_count; i++)
+	for (size_t i = 0; i < bsd.texel_count; i++)
 	{
 		info->weight_values_plane1[i] = static_cast<float>(weight_plane1[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
 		if (info->is_dual_plane_block)
@@ -1362,7 +1362,7 @@ astcenc_error astcenc_get_block_info(
 	}
 
 	// Unpack partition assignments for each texel
-	for (unsigned int i = 0; i < bsd.texel_count; i++)
+	for (size_t i = 0; i < bsd.texel_count; i++)
 	{
 		info->partition_assignment[i] = pi.partition_of_texel[i];
 	}
diff --git a/Source/astcenc_find_best_partitioning.cpp b/Source/astcenc_find_best_partitioning.cpp
index f2e43282..da4eb9fb 100644
--- a/Source/astcenc_find_best_partitioning.cpp
+++ b/Source/astcenc_find_best_partitioning.cpp
@@ -59,25 +59,25 @@
  */
 static void kmeans_init(
 	const image_block& blk,
-	unsigned int texel_count,
-	unsigned int partition_count,
+	size_t texel_count,
+	size_t partition_count,
 	vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS]
 ) {
 	promise(texel_count > 0);
 	promise(partition_count > 0);
 
-	unsigned int clusters_selected = 0;
+	size_t clusters_selected = 0;
 	float distances[BLOCK_MAX_TEXELS];
 
 	// Pick a random sample as first cluster center; 145897 from random.org
-	unsigned int sample = 145897 % texel_count;
+	size_t sample = 145897 % texel_count;
 	vfloat4 center_color = blk.texel(sample);
 	cluster_centers[clusters_selected] = center_color;
 	clusters_selected++;
 
 	// Compute the distance to the first cluster center
 	float distance_sum = 0.0f;
-	for (unsigned int i = 0; i < texel_count; i++)
+	for (size_t i = 0; i < texel_count; i++)
 	{
 		vfloat4 color = blk.texel(i);
 		vfloat4 diff = color - center_color;
@@ -93,7 +93,7 @@ static void kmeans_init(
 		0.347661f, 0.731960f, 0.156391f
 	};
 
-	unsigned int cutoff = (clusters_selected - 1) + 3 * (partition_count - 2);
+	size_t cutoff = (clusters_selected - 1) + 3 * (partition_count - 2);
 
 	// Pick the remaining samples as needed
 	while (true)
@@ -122,7 +122,7 @@ static void kmeans_init(
 
 		// Compute the distance to the new cluster center, keep the min dist
 		distance_sum = 0.0f;
-		for (unsigned int i = 0; i < texel_count; i++)
+		for (size_t i = 0; i < texel_count; i++)
 		{
 			vfloat4 color = blk.texel(i);
 			vfloat4 diff = color - center_color;
@@ -145,8 +145,8 @@ static void kmeans_init(
  */
 static void kmeans_assign(
 	const image_block& blk,
-	unsigned int texel_count,
-	unsigned int partition_count,
+	size_t texel_count,
+	size_t partition_count,
 	const vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS],
 	uint8_t partition_of_texel[BLOCK_MAX_TEXELS]
 ) {
@@ -156,13 +156,13 @@ static void kmeans_assign(
 	uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 };
 
 	// Find the best partition for every texel
-	for (unsigned int i = 0; i < texel_count; i++)
+	for (size_t i = 0; i < texel_count; i++)
 	{
 		float best_distance = std::numeric_limits<float>::max();
-		unsigned int best_partition = 0;
+		size_t best_partition = 0;
 
 		vfloat4 color = blk.texel(i);
-		for (unsigned int j = 0; j < partition_count; j++)
+		for (size_t j = 0; j < partition_count; j++)
 		{
 			vfloat4 diff = color - cluster_centers[j];
 			float distance = dot_s(diff * diff, blk.channel_weight);
@@ -185,7 +185,7 @@ static void kmeans_assign(
 	do
 	{
 		problem_case = false;
-		for (unsigned int i = 0; i < partition_count; i++)
+		for (size_t i = 0; i < partition_count; i++)
 		{
 			if (partition_texel_count[i] == 0)
 			{
@@ -209,8 +209,8 @@ static void kmeans_assign(
  */
 static void kmeans_update(
 	const image_block& blk,
-	unsigned int texel_count,
-	unsigned int partition_count,
+	size_t texel_count,
+	size_t partition_count,
 	vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS],
 	const uint8_t partition_of_texel[BLOCK_MAX_TEXELS]
 ) {
@@ -227,7 +227,7 @@ static void kmeans_update(
 	uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS] { 0 };
 
 	// Find the center-of-gravity in each cluster
-	for (unsigned int i = 0; i < texel_count; i++)
+	for (size_t i = 0; i < texel_count; i++)
 	{
 		uint8_t partition = partition_of_texel[i];
 		color_sum[partition] += blk.texel(i);
@@ -235,7 +235,7 @@ static void kmeans_update(
 	}
 
 	// Set the center of gravity to be the new cluster center
-	for (unsigned int i = 0; i < partition_count; i++)
+	for (size_t i = 0; i < partition_count; i++)
 	{
 		float scale = 1.0f / static_cast<float>(partition_texel_count[i]);
 		cluster_centers[i] = color_sum[i] * scale;
@@ -352,7 +352,7 @@ static inline uint8_t partition_mismatch4(
 	return static_cast<uint8_t>(astc::min(v0, v1, v2, v3) / 2);
 }
 
-using mismatch_dispatch = unsigned int (*)(const uint64_t*, const uint64_t*);
+using mismatch_dispatch = size_t (*)(const uint64_t*, const uint64_t*);
 
 /**
  * @brief Count the partition table mismatches vs the data clustering.
@@ -364,16 +364,16 @@ using mismatch_dispatch = unsigned int (*)(const uint64_t*, const uint64_t*);
  */
 static void count_partition_mismatch_bits(
 	const block_size_descriptor& bsd,
-	unsigned int partition_count,
+	size_t partition_count,
 	const uint64_t bitmaps[BLOCK_MAX_PARTITIONS],
 	uint8_t mismatch_counts[BLOCK_MAX_PARTITIONINGS]
 ) {
-	unsigned int active_count = bsd.partitioning_count_selected[partition_count - 1];
+	size_t active_count = bsd.partitioning_count_selected[partition_count - 1];
 	promise(active_count > 0);
 
 	if (partition_count == 2)
 	{
-		for (unsigned int i = 0; i < active_count; i++)
+		for (size_t i = 0; i < active_count; i++)
 		{
 			mismatch_counts[i] = partition_mismatch2(bitmaps, bsd.coverage_bitmaps_2[i]);
 			assert(mismatch_counts[i] < BLOCK_MAX_KMEANS_TEXELS);
@@ -382,7 +382,7 @@ static void count_partition_mismatch_bits(
 	}
 	else if (partition_count == 3)
 	{
-		for (unsigned int i = 0; i < active_count; i++)
+		for (size_t i = 0; i < active_count; i++)
 		{
 			mismatch_counts[i] = partition_mismatch3(bitmaps, bsd.coverage_bitmaps_3[i]);
 			assert(mismatch_counts[i] < BLOCK_MAX_KMEANS_TEXELS);
@@ -391,7 +391,7 @@ static void count_partition_mismatch_bits(
 	}
 	else
 	{
-		for (unsigned int i = 0; i < active_count; i++)
+		for (size_t i = 0; i < active_count; i++)
 		{
 			mismatch_counts[i] = partition_mismatch4(bitmaps, bsd.coverage_bitmaps_4[i]);
 			assert(mismatch_counts[i] < BLOCK_MAX_KMEANS_TEXELS);
@@ -409,9 +409,9 @@ static void count_partition_mismatch_bits(
  *
  * @return The number of active partitions in this selection.
  */
-static unsigned int get_partition_ordering_by_mismatch_bits(
-	unsigned int texel_count,
-	unsigned int partitioning_count,
+static size_t get_partition_ordering_by_mismatch_bits(
+	size_t texel_count,
+	size_t partitioning_count,
 	const uint8_t mismatch_count[BLOCK_MAX_PARTITIONINGS],
 	uint16_t partition_ordering[BLOCK_MAX_PARTITIONINGS]
 ) {
@@ -419,7 +419,7 @@ static unsigned int get_partition_ordering_by_mismatch_bits(
 	uint16_t mscount[BLOCK_MAX_KMEANS_TEXELS] { 0 };
 
 	// Create the histogram of mismatch counts
-	for (unsigned int i = 0; i < partitioning_count; i++)
+	for (size_t i = 0; i < partitioning_count; i++)
 	{
 		mscount[mismatch_count[i]]++;
 	}
@@ -427,7 +427,7 @@ static unsigned int get_partition_ordering_by_mismatch_bits(
 	// Create a running sum from the histogram array
 	// Indices store previous values only; i.e. exclude self after sum
 	uint16_t sum = 0;
-	for (unsigned int i = 0; i < texel_count; i++)
+	for (size_t i = 0; i < texel_count; i++)
 	{
 		uint16_t cnt = mscount[i];
 		mscount[i] = sum;
@@ -436,9 +436,9 @@ static unsigned int get_partition_ordering_by_mismatch_bits(
 
 	// Use the running sum as the index, incrementing after read to allow
 	// sequential entries with the same count
-	for (unsigned int i = 0; i < partitioning_count; i++)
+	for (size_t i = 0; i < partitioning_count; i++)
 	{
-		unsigned int idx = mscount[mismatch_count[i]]++;
+		size_t idx = mscount[mismatch_count[i]]++;
 		partition_ordering[idx] = static_cast<uint16_t>(i);
 	}
 
@@ -455,17 +455,17 @@ static unsigned int get_partition_ordering_by_mismatch_bits(
  *
  * @return The number of active partitionings in this selection.
  */
-static unsigned int compute_kmeans_partition_ordering(
+static size_t compute_kmeans_partition_ordering(
 	const block_size_descriptor& bsd,
 	const image_block& blk,
-	unsigned int partition_count,
+	size_t partition_count,
 	uint16_t partition_ordering[BLOCK_MAX_PARTITIONINGS]
 ) {
 	vfloat4 cluster_centers[BLOCK_MAX_PARTITIONS];
 	uint8_t texel_partitions[BLOCK_MAX_TEXELS];
 
 	// Use three passes of k-means clustering to partition the block data
-	for (unsigned int i = 0; i < 3; i++)
+	for (size_t i = 0; i < 3; i++)
 	{
 		if (i == 0)
 		{
@@ -481,11 +481,11 @@ static unsigned int compute_kmeans_partition_ordering(
 
 	// Construct the block bitmaps of texel assignments to each partition
 	uint64_t bitmaps[BLOCK_MAX_PARTITIONS] { 0 };
-	unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
+	size_t texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
 	promise(texels_to_process > 0);
-	for (unsigned int i = 0; i < texels_to_process; i++)
+	for (size_t i = 0; i < texels_to_process; i++)
 	{
-		unsigned int idx = bsd.kmeans_texels[i];
+		size_t idx = bsd.kmeans_texels[i];
 		bitmaps[texel_partitions[idx]] |= 1ULL << i;
 	}
 
@@ -510,11 +510,11 @@ static unsigned int compute_kmeans_partition_ordering(
  * @param[out] best_partitions The array of best partition values.
  */
 static void insert_result(
-	unsigned int max_values,
+	size_t max_values,
 	float this_error,
-	unsigned int this_partition,
+	size_t this_partition,
 	float* best_errors,
-	unsigned int* best_partitions)
+	size_t* best_partitions)
 {
 	promise(max_values > 0);
 
@@ -525,7 +525,7 @@ static void insert_result(
 	}
 
 	// Else insert into the list in error-order
-	for (unsigned int i = 0; i < max_values; i++)
+	for (size_t i = 0; i < max_values; i++)
 	{
 		// Existing result is better - move on ...
 		if (this_error > best_errors[i])
@@ -534,7 +534,7 @@ static void insert_result(
 		}
 
 		// Move existing results down one
-		for (unsigned int j = max_values - 1; j > i; j--)
+		for (size_t j = max_values - 1; j > i; j--)
 		{
 			best_errors[j] = best_errors[j - 1];
 			best_partitions[j] = best_partitions[j - 1];
@@ -548,17 +548,17 @@ static void insert_result(
 }
 
 /* See header for documentation. */
-unsigned int find_best_partition_candidates(
+size_t find_best_partition_candidates(
 	const block_size_descriptor& bsd,
 	const image_block& blk,
-	unsigned int partition_count,
-	unsigned int partition_search_limit,
-	unsigned int best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES],
-	unsigned int requested_candidates
+	size_t partition_count,
+	size_t partition_search_limit,
+	size_t best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES],
+	size_t requested_candidates
 ) {
 	// Constant used to estimate quantization error for a given partitioning; the optimal value for
 	// this depends on bitrate. These values have been determined empirically.
-	unsigned int texels_per_block = bsd.texel_count;
+	size_t texels_per_block = bsd.texel_count;
 	float weight_imprecision_estim = 0.055f;
 	if (texels_per_block <= 20)
 	{
@@ -579,7 +579,7 @@ unsigned int find_best_partition_candidates(
 	weight_imprecision_estim = weight_imprecision_estim * weight_imprecision_estim;
 
 	uint16_t partition_sequence[BLOCK_MAX_PARTITIONINGS];
-	unsigned int sequence_len = compute_kmeans_partition_ordering(bsd, blk, partition_count, partition_sequence);
+	size_t sequence_len = compute_kmeans_partition_ordering(bsd, blk, partition_count, partition_sequence);
 	partition_search_limit = astc::min(partition_search_limit, sequence_len);
 	requested_candidates = astc::min(partition_search_limit, requested_candidates);
 
@@ -587,13 +587,13 @@ unsigned int find_best_partition_candidates(
 
 	// Partitioning errors assuming uncorrelated-chrominance endpoints
 	float uncor_best_errors[TUNE_MAX_PARTITIONING_CANDIDATES];
-	unsigned int uncor_best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES];
+	size_t uncor_best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES];
 
 	// Partitioning errors assuming same-chrominance endpoints
 	float samec_best_errors[TUNE_MAX_PARTITIONING_CANDIDATES];
-	unsigned int samec_best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES];
+	size_t samec_best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES];
 
-	for (unsigned int i = 0; i < requested_candidates; i++)
+	for (size_t i = 0; i < requested_candidates; i++)
 	{
 		uncor_best_errors[i] = ERROR_CALC_DEFAULT;
 		samec_best_errors[i] = ERROR_CALC_DEFAULT;
@@ -601,9 +601,9 @@ unsigned int find_best_partition_candidates(
 
 	if (uses_alpha)
 	{
-		for (unsigned int i = 0; i < partition_search_limit; i++)
+		for (size_t i = 0; i < partition_search_limit; i++)
 		{
-			unsigned int partition = partition_sequence[i];
+			size_t partition = partition_sequence[i];
 			const auto& pi = bsd.get_raw_partition_info(partition_count, partition);
 
 			// Compute weighting to give to each component in each partition
@@ -619,7 +619,7 @@ unsigned int find_best_partition_candidates(
 
 			float line_lengths[BLOCK_MAX_PARTITIONS];
 
-			for (unsigned int j = 0; j < partition_count; j++)
+			for (size_t j = 0; j < partition_count; j++)
 			{
 				partition_metrics& pm = pms[j];
 
@@ -657,7 +657,7 @@ unsigned int find_best_partition_candidates(
 			//     4(optimized): square the vector once, then do a dot-product with the average
 			//        texel error, then multiply by the number of texels.
 
-			for (unsigned int j = 0; j < partition_count; j++)
+			for (size_t j = 0; j < partition_count; j++)
 			{
 				float tpp = static_cast<float>(pi.partition_texel_count[j]);
 				vfloat4 error_weights(tpp * weight_imprecision_estim);
@@ -675,9 +675,9 @@ unsigned int find_best_partition_candidates(
 	}
 	else
 	{
-		for (unsigned int i = 0; i < partition_search_limit; i++)
+		for (size_t i = 0; i < partition_search_limit; i++)
 		{
-			unsigned int partition = partition_sequence[i];
+			size_t partition = partition_sequence[i];
 			const auto& pi = bsd.get_raw_partition_info(partition_count, partition);
 
 			// Compute weighting to give to each component in each partition
@@ -686,7 +686,7 @@ unsigned int find_best_partition_candidates(
 
 			partition_lines3 plines[BLOCK_MAX_PARTITIONS];
 
-			for (unsigned int j = 0; j < partition_count; j++)
+			for (size_t j = 0; j < partition_count; j++)
 			{
 				partition_metrics& pm = pms[j];
 				partition_lines3& pl = plines[j];
@@ -723,7 +723,7 @@ unsigned int find_best_partition_candidates(
 			//     4(optimized): square the vector once, then do a dot-product with the average
 			//        texel error, then multiply by the number of texels.
 
-			for (unsigned int j = 0; j < partition_count; j++)
+			for (size_t j = 0; j < partition_count; j++)
 			{
 				partition_lines3& pl = plines[j];
 
@@ -742,23 +742,23 @@ unsigned int find_best_partition_candidates(
 		}
 	}
 
-	unsigned int interleave[2 * TUNE_MAX_PARTITIONING_CANDIDATES];
-	for (unsigned int i = 0; i < requested_candidates; i++)
+	size_t interleave[2 * TUNE_MAX_PARTITIONING_CANDIDATES];
+	for (size_t i = 0; i < requested_candidates; i++)
 	{
 		interleave[2 * i] = bsd.get_raw_partition_info(partition_count, uncor_best_partitions[i]).partition_index;
 		interleave[2 * i + 1] = bsd.get_raw_partition_info(partition_count, samec_best_partitions[i]).partition_index;
 	}
 
 	uint64_t bitmasks[1024/64] { 0 };
-	unsigned int emitted = 0;
+	size_t emitted = 0;
 
 	// Deduplicate the first "requested" entries
-	for (unsigned int i = 0; i < requested_candidates * 2;  i++)
+	for (size_t i = 0; i < requested_candidates * 2;  i++)
 	{
-		unsigned int partition = interleave[i];
+		size_t partition = interleave[i];
 
-		unsigned int word = partition / 64;
-		unsigned int bit = partition % 64;
+		size_t word = partition / 64;
+		size_t bit = partition % 64;
 
 		bool written = bitmasks[word] & (1ull << bit);
 
diff --git a/Source/astcenc_ideal_endpoints_and_weights.cpp b/Source/astcenc_ideal_endpoints_and_weights.cpp
index 8e6ee2f4..fa3347f6 100644
--- a/Source/astcenc_ideal_endpoints_and_weights.cpp
+++ b/Source/astcenc_ideal_endpoints_and_weights.cpp
@@ -38,7 +38,7 @@
 static vfloat bilinear_infill_vla(
 	const decimation_info& di,
 	const float* weights,
-	unsigned int index
+	size_t index
 ) {
 	// Load the bilinear filter texel weight indexes in the decimated grid
 	const uint8_t* weight_idx0 = di.texel_weights_tr[0] + index;
@@ -78,7 +78,7 @@ static vfloat bilinear_infill_vla(
 static vfloat bilinear_infill_vla_2(
 	const decimation_info& di,
 	const float* weights,
-	unsigned int index
+	size_t index
 ) {
 	// Load the bilinear filter texel weight indexes in the decimated grid
 	const uint8_t* weight_idx0 = di.texel_weights_tr[0] + index;
@@ -108,13 +108,13 @@ static void compute_ideal_colors_and_weights_1_comp(
 	const image_block& blk,
 	const partition_info& pi,
 	endpoints_and_weights& ei,
-	unsigned int component
+	size_t component
 ) {
-	unsigned int partition_count = pi.partition_count;
+	size_t partition_count = pi.partition_count;
 	ei.ep.partition_count = partition_count;
 	promise(partition_count > 0);
 
-	unsigned int texel_count = blk.texel_count;
+	size_t texel_count = blk.texel_count;
 	promise(texel_count > 0);
 
 	float error_weight;
@@ -146,15 +146,15 @@ static void compute_ideal_colors_and_weights_1_comp(
 	bool is_constant_wes { true };
 	float partition0_len_sq { 0.0f };
 
-	for (unsigned int i = 0; i < partition_count; i++)
+	for (size_t i = 0; i < partition_count; i++)
 	{
 		float lowvalue { 1e10f };
 		float highvalue { -1e10f };
 
-		unsigned int partition_texel_count = pi.partition_texel_count[i];
-		for (unsigned int j = 0; j < partition_texel_count; j++)
+		size_t partition_texel_count = pi.partition_texel_count[i];
+		for (size_t j = 0; j < partition_texel_count; j++)
 		{
-			unsigned int tix = pi.texels_of_partition[i][j];
+			size_t tix = pi.texels_of_partition[i][j];
 			float value = data_vr[tix];
 			lowvalue = astc::min(value, lowvalue);
 			highvalue = astc::max(value, highvalue);
@@ -179,9 +179,9 @@ static void compute_ideal_colors_and_weights_1_comp(
 			is_constant_wes = is_constant_wes && length_squared == partition0_len_sq;
 		}
 
-		for (unsigned int j = 0; j < partition_texel_count; j++)
+		for (size_t j = 0; j < partition_texel_count; j++)
 		{
-			unsigned int tix = pi.texels_of_partition[i][j];
+			size_t tix = pi.texels_of_partition[i][j];
 			float value = (data_vr[tix] - lowvalue) * scale;
 			value = astc::clamp1f(value);
 
@@ -195,8 +195,8 @@ static void compute_ideal_colors_and_weights_1_comp(
 	}
 
 	// Zero initialize any SIMD over-fetch
-	unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
-	for (unsigned int i = texel_count; i < texel_count_simd; i++)
+	size_t texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
+	for (size_t i = texel_count; i < texel_count_simd; i++)
 	{
 		ei.weights[i] = 0.0f;
 		ei.weight_error_scale[i] = 0.0f;
@@ -221,11 +221,11 @@ static void compute_ideal_colors_and_weights_2_comp(
 	int component1,
 	int component2
 ) {
-	unsigned int partition_count = pi.partition_count;
+	size_t partition_count = pi.partition_count;
 	ei.ep.partition_count = partition_count;
 	promise(partition_count > 0);
 
-	unsigned int texel_count = blk.texel_count;
+	size_t texel_count = blk.texel_count;
 	promise(texel_count > 0);
 
 	partition_metrics pms[BLOCK_MAX_PARTITIONS];
@@ -266,7 +266,7 @@ static void compute_ideal_colors_and_weights_2_comp(
 	vmask4 comp1_mask = vint4::lane_id() == vint4(component1);
 	vmask4 comp2_mask = vint4::lane_id() == vint4(component2);
 
-	for (unsigned int i = 0; i < partition_count; i++)
+	for (size_t i = 0; i < partition_count; i++)
 	{
 		vfloat4 dir = pms[i].dir;
 		if (hadd_s(dir) < 0.0f)
@@ -278,10 +278,10 @@ static void compute_ideal_colors_and_weights_2_comp(
 		float lowparam { 1e10f };
 		float highparam { -1e10f };
 
-		unsigned int partition_texel_count = pi.partition_texel_count[i];
-		for (unsigned int j = 0; j < partition_texel_count; j++)
+		size_t partition_texel_count = pi.partition_texel_count[i];
+		for (size_t j = 0; j < partition_texel_count; j++)
 		{
-			unsigned int tix = pi.texels_of_partition[i][j];
+			size_t tix = pi.texels_of_partition[i][j];
 			vfloat4 point = vfloat2(data_vr[tix], data_vg[tix]);
 			float param = dot_s(point - line.a, line.b);
 			ei.weights[tix] = param;
@@ -311,9 +311,9 @@ static void compute_ideal_colors_and_weights_2_comp(
 			is_constant_wes = is_constant_wes && length_squared == partition0_len_sq;
 		}
 
-		for (unsigned int j = 0; j < partition_texel_count; j++)
+		for (size_t j = 0; j < partition_texel_count; j++)
 		{
-			unsigned int tix = pi.texels_of_partition[i][j];
+			size_t tix = pi.texels_of_partition[i][j];
 			float idx = (ei.weights[tix] - lowparam) * scale;
 			idx = astc::clamp1f(idx);
 
@@ -333,8 +333,8 @@ static void compute_ideal_colors_and_weights_2_comp(
 	}
 
 	// Zero initialize any SIMD over-fetch
-	unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
-	for (unsigned int i = texel_count; i < texel_count_simd; i++)
+	size_t texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
+	for (size_t i = texel_count; i < texel_count_simd; i++)
 	{
 		ei.weights[i] = 0.0f;
 		ei.weight_error_scale[i] = 0.0f;
@@ -355,13 +355,13 @@ static void compute_ideal_colors_and_weights_3_comp(
 	const image_block& blk,
 	const partition_info& pi,
 	endpoints_and_weights& ei,
-	unsigned int omitted_component
+	size_t omitted_component
 ) {
-	unsigned int partition_count = pi.partition_count;
+	size_t partition_count = pi.partition_count;
 	ei.ep.partition_count = partition_count;
 	promise(partition_count > 0);
 
-	unsigned int texel_count = blk.texel_count;
+	size_t texel_count = blk.texel_count;
 	promise(texel_count > 0);
 
 	partition_metrics pms[BLOCK_MAX_PARTITIONS];
@@ -415,7 +415,7 @@ static void compute_ideal_colors_and_weights_3_comp(
 	bool is_constant_wes { true };
 	float partition0_len_sq { 0.0f };
 
-	for (unsigned int i = 0; i < partition_count; i++)
+	for (size_t i = 0; i < partition_count; i++)
 	{
 		vfloat4 dir = pms[i].dir;
 		if (hadd_rgb_s(dir) < 0.0f)
@@ -427,10 +427,10 @@ static void compute_ideal_colors_and_weights_3_comp(
 		float lowparam { 1e10f };
 		float highparam { -1e10f };
 
-		unsigned int partition_texel_count = pi.partition_texel_count[i];
-		for (unsigned int j = 0; j < partition_texel_count; j++)
+		size_t partition_texel_count = pi.partition_texel_count[i];
+		for (size_t j = 0; j < partition_texel_count; j++)
 		{
-			unsigned int tix = pi.texels_of_partition[i][j];
+			size_t tix = pi.texels_of_partition[i][j];
 			vfloat4 point = vfloat3(data_vr[tix], data_vg[tix], data_vb[tix]);
 			float param = dot3_s(point - line.a, line.b);
 			ei.weights[tix] = param;
@@ -460,9 +460,9 @@ static void compute_ideal_colors_and_weights_3_comp(
 			is_constant_wes = is_constant_wes && length_squared == partition0_len_sq;
 		}
 
-		for (unsigned int j = 0; j < partition_texel_count; j++)
+		for (size_t j = 0; j < partition_texel_count; j++)
 		{
-			unsigned int tix = pi.texels_of_partition[i][j];
+			size_t tix = pi.texels_of_partition[i][j];
 			float idx = (ei.weights[tix] - lowparam) * scale;
 			idx = astc::clamp1f(idx);
 
@@ -500,8 +500,8 @@ static void compute_ideal_colors_and_weights_3_comp(
 	}
 
 	// Zero initialize any SIMD over-fetch
-	unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
-	for (unsigned int i = texel_count; i < texel_count_simd; i++)
+	size_t texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
+	for (size_t i = texel_count; i < texel_count_simd; i++)
 	{
 		ei.weights[i] = 0.0f;
 		ei.weight_error_scale[i] = 0.0f;
@@ -524,9 +524,9 @@ static void compute_ideal_colors_and_weights_4_comp(
 ) {
 	const float error_weight = hadd_s(blk.channel_weight) / 4.0f;
 
-	unsigned int partition_count = pi.partition_count;
+	size_t partition_count = pi.partition_count;
 
-	unsigned int texel_count = blk.texel_count;
+	size_t texel_count = blk.texel_count;
 	promise(texel_count > 0);
 	promise(partition_count > 0);
 
@@ -537,7 +537,7 @@ static void compute_ideal_colors_and_weights_4_comp(
 	bool is_constant_wes { true };
 	float partition0_len_sq { 0.0f };
 
-	for (unsigned int i = 0; i < partition_count; i++)
+	for (size_t i = 0; i < partition_count; i++)
 	{
 		vfloat4 dir = pms[i].dir;
 		if (hadd_rgb_s(dir) < 0.0f)
@@ -549,10 +549,10 @@ static void compute_ideal_colors_and_weights_4_comp(
 		float lowparam { 1e10f };
 		float highparam { -1e10f };
 
-		unsigned int partition_texel_count = pi.partition_texel_count[i];
-		for (unsigned int j = 0; j < partition_texel_count; j++)
+		size_t partition_texel_count = pi.partition_texel_count[i];
+		for (size_t j = 0; j < partition_texel_count; j++)
 		{
-			unsigned int tix = pi.texels_of_partition[i][j];
+			size_t tix = pi.texels_of_partition[i][j];
 			vfloat4 point = blk.texel(tix);
 			float param = dot_s(point - line.a, line.b);
 			ei.weights[tix] = param;
@@ -585,9 +585,9 @@ static void compute_ideal_colors_and_weights_4_comp(
 		ei.ep.endpt0[i] = line.a + line.b * lowparam;
 		ei.ep.endpt1[i] = line.a + line.b * highparam;
 
-		for (unsigned int j = 0; j < partition_texel_count; j++)
+		for (size_t j = 0; j < partition_texel_count; j++)
 		{
-			unsigned int tix = pi.texels_of_partition[i][j];
+			size_t tix = pi.texels_of_partition[i][j];
 			float idx = (ei.weights[tix] - lowparam) * scale;
 			idx = astc::clamp1f(idx);
 
@@ -598,8 +598,8 @@ static void compute_ideal_colors_and_weights_4_comp(
 	}
 
 	// Zero initialize any SIMD over-fetch
-	unsigned int texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
-	for (unsigned int i = texel_count; i < texel_count_simd; i++)
+	size_t texel_count_simd = round_up_to_simd_multiple_vla(texel_count);
+	for (size_t i = texel_count; i < texel_count_simd; i++)
 	{
 		ei.weights[i] = 0.0f;
 		ei.weight_error_scale[i] = 0.0f;
@@ -630,7 +630,7 @@ void compute_ideal_colors_and_weights_1plane(
 void compute_ideal_colors_and_weights_2planes(
 	const block_size_descriptor& bsd,
 	const image_block& blk,
-	unsigned int plane2_component,
+	size_t plane2_component,
 	endpoints_and_weights& ei1,
 	endpoints_and_weights& ei2
 ) {
@@ -691,13 +691,13 @@ float compute_error_of_weight_set_1plane(
 	const float* dec_weight_quant_uvalue
 ) {
 	vfloatacc error_summav = vfloatacc::zero();
-	unsigned int texel_count = di.texel_count;
+	size_t texel_count = di.texel_count;
 	promise(texel_count > 0);
 
 	// Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized
 	if (di.max_texel_weight_count > 2)
 	{
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			// Compute the bilinear interpolation of the decimated weight grid
 			vfloat current_values = bilinear_infill_vla(di, dec_weight_quant_uvalue, i);
@@ -713,7 +713,7 @@ float compute_error_of_weight_set_1plane(
 	}
 	else if (di.max_texel_weight_count > 1)
 	{
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			// Compute the bilinear interpolation of the decimated weight grid
 			vfloat current_values = bilinear_infill_vla_2(di, dec_weight_quant_uvalue, i);
@@ -729,7 +729,7 @@ float compute_error_of_weight_set_1plane(
 	}
 	else
 	{
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			// Load the weight set directly, without interpolation
 			vfloat current_values = loada(dec_weight_quant_uvalue + i);
@@ -757,13 +757,13 @@ float compute_error_of_weight_set_2planes(
 	const float* dec_weight_quant_uvalue_plane2
 ) {
 	vfloatacc error_summav = vfloatacc::zero();
-	unsigned int texel_count = di.texel_count;
+	size_t texel_count = di.texel_count;
 	promise(texel_count > 0);
 
 	// Process SIMD-width chunks, safe to over-fetch - the extra space is zero initialized
 	if (di.max_texel_weight_count > 2)
 	{
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			// Plane 1
 			// Compute the bilinear interpolation of the decimated weight grid
@@ -788,7 +788,7 @@ float compute_error_of_weight_set_2planes(
 	}
 	else if (di.max_texel_weight_count > 1)
 	{
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			// Plane 1
 			// Compute the bilinear interpolation of the decimated weight grid
@@ -813,7 +813,7 @@ float compute_error_of_weight_set_2planes(
 	}
 	else
 	{
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			// Plane 1
 			// Load the weight set directly, without interpolation
@@ -847,8 +847,8 @@ void compute_ideal_weights_for_decimation(
 	const decimation_info& di,
 	float* dec_weight_ideal_value
 ) {
-	unsigned int texel_count = di.texel_count;
-	unsigned int weight_count = di.weight_count;
+	size_t texel_count = di.texel_count;
+	size_t weight_count = di.weight_count;
 	bool is_direct = texel_count == weight_count;
 	promise(texel_count > 0);
 	promise(weight_count > 0);
@@ -857,7 +857,7 @@ void compute_ideal_weights_for_decimation(
 	// zero-initialized SIMD over-fetch region
 	if (is_direct)
 	{
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vfloat weight(ei.weights + i);
 			storea(weight, dec_weight_ideal_value + i);
@@ -875,7 +875,7 @@ void compute_ideal_weights_for_decimation(
 
 	// This overshoots - this is OK as we initialize the array tails in the
 	// decimation table structures to safe values ...
-	for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
+	for (size_t i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
 		// Start with a small value to avoid div-by-zero later
 		vfloat weight_weight(1e-10f);
@@ -883,10 +883,10 @@ void compute_ideal_weights_for_decimation(
 
 		// Accumulate error weighting of all the texels using this weight
 		vint weight_texel_count(di.weight_texel_count + i);
-		unsigned int max_texel_count = hmax_s(weight_texel_count);
+		size_t max_texel_count = hmax_s(weight_texel_count);
 		promise(max_texel_count > 0);
 
-		for (unsigned int j = 0; j < max_texel_count; j++)
+		for (size_t j = 0; j < max_texel_count; j++)
 		{
 			const uint8_t* texel = di.weight_texels_tr[j] + i;
 			vfloat weight = loada(di.weights_texel_contribs_tr[j] + i);
@@ -910,7 +910,7 @@ void compute_ideal_weights_for_decimation(
 	// over-process full SIMD vectors - the tail is zeroed.
 	if (di.max_texel_weight_count <= 2)
 	{
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vfloat weight = bilinear_infill_vla_2(di, dec_weight_ideal_value, i);
 			storea(weight, infilled_weights + i);
@@ -918,7 +918,7 @@ void compute_ideal_weights_for_decimation(
 	}
 	else
 	{
-		for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vfloat weight = bilinear_infill_vla(di, dec_weight_ideal_value, i);
 			storea(weight, infilled_weights + i);
@@ -930,7 +930,7 @@ void compute_ideal_weights_for_decimation(
 	constexpr float stepsize = 0.25f;
 	constexpr float chd_scale = -WEIGHTS_TEXEL_SUM;
 
-	for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
+	for (size_t i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
 		vfloat weight_val = loada(dec_weight_ideal_value + i);
 
@@ -941,10 +941,10 @@ void compute_ideal_weights_for_decimation(
 
 		// Accumulate error weighting of all the texels using this weight
 		vint weight_texel_count(di.weight_texel_count + i);
-		unsigned int max_texel_count = hmax_s(weight_texel_count);
+		size_t max_texel_count = hmax_s(weight_texel_count);
 		promise(max_texel_count > 0);
 
-		for (unsigned int j = 0; j < max_texel_count; j++)
+		for (size_t j = 0; j < max_texel_count; j++)
 		{
 			const uint8_t* texel = di.weight_texels_tr[j] + i;
 			vfloat contrib_weight = loada(di.weights_texel_contribs_tr[j] + i);
@@ -1152,16 +1152,16 @@ void recompute_ideal_colors_1plane(
 	vfloat4 rgbs_vectors[BLOCK_MAX_PARTITIONS],
 	vfloat4 rgbo_vectors[BLOCK_MAX_PARTITIONS]
 ) {
-	unsigned int weight_count = di.weight_count;
-	unsigned int total_texel_count = blk.texel_count;
-	unsigned int partition_count = pi.partition_count;
+	size_t weight_count = di.weight_count;
+	size_t total_texel_count = blk.texel_count;
+	size_t partition_count = pi.partition_count;
 
 	promise(weight_count > 0);
 	promise(total_texel_count > 0);
 	promise(partition_count > 0);
 
 	ASTCENC_ALIGNAS float dec_weight[BLOCK_MAX_WEIGHTS];
-	for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
+	for (size_t i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
 		vint unquant_value(dec_weights_uquant + i);
 		vfloat unquant_valuef = int_to_float(unquant_value) * vfloat(1.0f / 64.0f);
@@ -1176,7 +1176,7 @@ void recompute_ideal_colors_1plane(
 	}
 	else if (di.max_texel_weight_count <= 2)
 	{
-		for (unsigned int i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vfloat weight = bilinear_infill_vla_2(di, dec_weight, i);
 			storea(weight, undec_weight + i);
@@ -1186,7 +1186,7 @@ void recompute_ideal_colors_1plane(
 	}
 	else
 	{
-		for (unsigned int i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vfloat weight = bilinear_infill_vla(di, dec_weight, i);
 			storea(weight, undec_weight + i);
@@ -1197,9 +1197,9 @@ void recompute_ideal_colors_1plane(
 
 	vfloat4 rgba_sum(blk.data_mean * static_cast<float>(blk.texel_count));
 
-	for (unsigned int i = 0; i < partition_count; i++)
+	for (size_t i = 0; i < partition_count; i++)
 	{
-		unsigned int texel_count = pi.partition_texel_count[i];
+		size_t texel_count = pi.partition_texel_count[i];
 		const uint8_t *texel_indexes = pi.texels_of_partition[i];
 
 		// Only compute a partition mean if more than one partition
@@ -1207,9 +1207,9 @@ void recompute_ideal_colors_1plane(
 		{
 			rgba_sum = vfloat4::zero();
 			promise(texel_count > 0);
-			for (unsigned int j = 0; j < texel_count; j++)
+			for (size_t j = 0; j < texel_count; j++)
 			{
-				unsigned int tix = texel_indexes[j];
+				size_t tix = texel_indexes[j];
 				rgba_sum += blk.texel(tix);
 			}
 		}
@@ -1238,9 +1238,9 @@ void recompute_ideal_colors_1plane(
 		vfloat4 color_weight = blk.channel_weight;
 		float ls_weight = hadd_rgb_s(color_weight);
 
-		for (unsigned int j = 0; j < texel_count; j++)
+		for (size_t j = 0; j < texel_count; j++)
 		{
-			unsigned int tix = texel_indexes[j];
+			size_t tix = texel_indexes[j];
 			vfloat4 rgba = blk.texel(tix);
 
 			float idx0 = undec_weight_ref[tix];
@@ -1377,8 +1377,8 @@ void recompute_ideal_colors_2planes(
 	vfloat4& rgbo_vector,
 	int plane2_component
 ) {
-	unsigned int weight_count = di.weight_count;
-	unsigned int total_texel_count = blk.texel_count;
+	size_t weight_count = di.weight_count;
+	size_t total_texel_count = blk.texel_count;
 
 	promise(total_texel_count > 0);
 	promise(weight_count > 0);
@@ -1388,7 +1388,7 @@ void recompute_ideal_colors_2planes(
 
 	assert(weight_count <= BLOCK_MAX_WEIGHTS_2PLANE);
 
-	for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
+	for (size_t i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
 		vint unquant_value1(dec_weights_uquant_plane1 + i);
 		vfloat unquant_value1f = int_to_float(unquant_value1) * vfloat(1.0f / 64.0f);
@@ -1412,7 +1412,7 @@ void recompute_ideal_colors_2planes(
 	}
 	else if (di.max_texel_weight_count <= 2)
 	{
-		for (unsigned int i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vfloat weight = bilinear_infill_vla_2(di, dec_weight_plane1, i);
 			storea(weight, undec_weight_plane1 + i);
@@ -1426,7 +1426,7 @@ void recompute_ideal_colors_2planes(
 	}
 	else
 	{
-		for (unsigned int i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH)
+		for (size_t i = 0; i < total_texel_count; i += ASTCENC_SIMD_WIDTH)
 		{
 			vfloat weight = bilinear_infill_vla(di, dec_weight_plane1, i);
 			storea(weight, undec_weight_plane1 + i);
@@ -1439,7 +1439,7 @@ void recompute_ideal_colors_2planes(
 		undec_weight_plane2_ref = undec_weight_plane2;
 	}
 
-	unsigned int texel_count = bsd.texel_count;
+	size_t texel_count = bsd.texel_count;
 	vfloat4 rgba_weight_sum = max(blk.channel_weight * static_cast<float>(texel_count), 1e-17f);
 	vfloat4 scale_dir = normalize(blk.data_mean.swz<0, 1, 2>());
 
@@ -1471,7 +1471,7 @@ void recompute_ideal_colors_2planes(
 	vfloat4 color_weight = blk.channel_weight;
 	float ls_weight = hadd_rgb_s(color_weight);
 
-	for (unsigned int j = 0; j < texel_count; j++)
+	for (size_t j = 0; j < texel_count; j++)
 	{
 		vfloat4 rgba = blk.texel(j);
 
diff --git a/Source/astcenc_image.cpp b/Source/astcenc_image.cpp
index 079f69f1..2c73c058 100644
--- a/Source/astcenc_image.cpp
+++ b/Source/astcenc_image.cpp
@@ -153,14 +153,14 @@ void load_image_block(
 	const astcenc_image& img,
 	image_block& blk,
 	const block_size_descriptor& bsd,
-	unsigned int xpos,
-	unsigned int ypos,
-	unsigned int zpos,
+	size_t xpos,
+	size_t ypos,
+	size_t zpos,
 	const astcenc_swizzle& swz
 ) {
-	unsigned int xsize = img.dim_x;
-	unsigned int ysize = img.dim_y;
-	unsigned int zsize = img.dim_z;
+	size_t xsize = img.dim_x;
+	size_t ysize = img.dim_y;
+	size_t zsize = img.dim_z;
 
 	blk.xpos = xpos;
 	blk.ypos = ypos;
@@ -208,18 +208,18 @@ void load_image_block(
 		converter = encode_texel_lns;
 	}
 
-	for (unsigned int z = 0; z < bsd.zdim; z++)
+	for (size_t z = 0; z < bsd.zdim; z++)
 	{
-		unsigned int zi = astc::min(zpos + z, zsize - 1);
+		size_t zi = astc::min(zpos + z, zsize - 1);
 		void* plane = img.data[zi];
 
-		for (unsigned int y = 0; y < bsd.ydim; y++)
+		for (size_t y = 0; y < bsd.ydim; y++)
 		{
-			unsigned int yi = astc::min(ypos + y, ysize - 1);
+			size_t yi = astc::min(ypos + y, ysize - 1);
 
-			for (unsigned int x = 0; x < bsd.xdim; x++)
+			for (size_t x = 0; x < bsd.xdim; x++)
 			{
-				unsigned int xi = astc::min(xpos + x, xsize - 1);
+				size_t xi = astc::min(xpos + x, xsize - 1);
 
 				vfloat4 datav = loader(plane, (4 * xsize * yi) + (4 * xi));
 				datav = swizzler(datav, swz);
@@ -270,16 +270,16 @@ void load_image_block_fast_ldr(
 	const astcenc_image& img,
 	image_block& blk,
 	const block_size_descriptor& bsd,
-	unsigned int xpos,
-	unsigned int ypos,
-	unsigned int zpos,
+	size_t xpos,
+	size_t ypos,
+	size_t zpos,
 	const astcenc_swizzle& swz
 ) {
 	(void)swz;
 	(void)decode_mode;
 
-	unsigned int xsize = img.dim_x;
-	unsigned int ysize = img.dim_y;
+	size_t xsize = img.dim_x;
+	size_t ysize = img.dim_y;
 
 	blk.xpos = xpos;
 	blk.ypos = ypos;
@@ -292,13 +292,13 @@ void load_image_block_fast_ldr(
 	int idx = 0;
 
 	const uint8_t* plane = static_cast<const uint8_t*>(img.data[0]);
-	for (unsigned int y = ypos; y < ypos + bsd.ydim; y++)
+	for (size_t y = ypos; y < ypos + bsd.ydim; y++)
 	{
-		unsigned int yi = astc::min(y, ysize - 1);
+		size_t yi = astc::min(y, ysize - 1);
 
-		for (unsigned int x = xpos; x < xpos + bsd.xdim; x++)
+		for (size_t x = xpos; x < xpos + bsd.xdim; x++)
 		{
-			unsigned int xi = astc::min(x, xsize - 1);
+			size_t xi = astc::min(x, xsize - 1);
 
 			vint4 datavi = vint4(plane + (4 * xsize * yi) + (4 * xi));
 			vfloat4 datav = int_to_float(datavi) * (65535.0f / 255.0f);
@@ -336,26 +336,26 @@ void store_image_block(
 	astcenc_image& img,
 	const image_block& blk,
 	const block_size_descriptor& bsd,
-	unsigned int xpos,
-	unsigned int ypos,
-	unsigned int zpos,
+	size_t xpos,
+	size_t ypos,
+	size_t zpos,
 	const astcenc_swizzle& swz
 ) {
-	unsigned int x_size = img.dim_x;
-	unsigned int x_start = xpos;
-	unsigned int x_end = astc::min(x_size, xpos + bsd.xdim);
-	unsigned int x_count = x_end - x_start;
-	unsigned int x_nudge = bsd.xdim - x_count;
-
-	unsigned int y_size = img.dim_y;
-	unsigned int y_start = ypos;
-	unsigned int y_end = astc::min(y_size, ypos + bsd.ydim);
-	unsigned int y_count = y_end - y_start;
-	unsigned int y_nudge = (bsd.ydim - y_count) * bsd.xdim;
-
-	unsigned int z_size = img.dim_z;
-	unsigned int z_start = zpos;
-	unsigned int z_end = astc::min(z_size, zpos + bsd.zdim);
+	size_t x_size = img.dim_x;
+	size_t x_start = xpos;
+	size_t x_end = astc::min(x_size, xpos + bsd.xdim);
+	size_t x_count = x_end - x_start;
+	size_t x_nudge = bsd.xdim - x_count;
+
+	size_t y_size = img.dim_y;
+	size_t y_start = ypos;
+	size_t y_end = astc::min(y_size, ypos + bsd.ydim);
+	size_t y_count = y_end - y_start;
+	size_t y_nudge = (bsd.ydim - y_count) * bsd.xdim;
+
+	size_t z_size = img.dim_z;
+	size_t z_start = zpos;
+	size_t z_end = astc::min(z_size, zpos + bsd.zdim);
 
 	// True if any non-identity swizzle
 	bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) ||
@@ -368,19 +368,19 @@ void store_image_block(
 	int idx = 0;
 	if (img.data_type == ASTCENC_TYPE_U8)
 	{
-		for (unsigned int z = z_start; z < z_end; z++)
+		for (size_t z = z_start; z < z_end; z++)
 		{
 			// Fetch the image plane
 			uint8_t* data8 = static_cast<uint8_t*>(img.data[z]);
 
-			for (unsigned int y = y_start; y < y_end; y++)
+			for (size_t y = y_start; y < y_end; y++)
 			{
 				uint8_t* data8_row = data8 + (4 * x_size * y) + (4 * x_start);
 
-				for (unsigned int x = 0; x < x_count; x += ASTCENC_SIMD_WIDTH)
+				for (size_t x = 0; x < x_count; x += ASTCENC_SIMD_WIDTH)
 				{
-					unsigned int max_texels = ASTCENC_SIMD_WIDTH;
-					unsigned int used_texels = astc::min(x_count - x, max_texels);
+					size_t max_texels = ASTCENC_SIMD_WIDTH;
+					size_t used_texels = astc::min(x_count - x, max_texels);
 
 					// Unaligned load as rows are not always SIMD_WIDTH long
 					vfloat data_r(blk.data_r + idx);
@@ -445,16 +445,16 @@ void store_image_block(
 	}
 	else if (img.data_type == ASTCENC_TYPE_F16)
 	{
-		for (unsigned int z = z_start; z < z_end; z++)
+		for (size_t z = z_start; z < z_end; z++)
 		{
 			// Fetch the image plane
 			uint16_t* data16 = static_cast<uint16_t*>(img.data[z]);
 
-			for (unsigned int y = y_start; y < y_end; y++)
+			for (size_t y = y_start; y < y_end; y++)
 			{
 				uint16_t* data16_row = data16 + (4 * x_size * y) + (4 * x_start);
 
-				for (unsigned int x = 0; x < x_count; x++)
+				for (size_t x = 0; x < x_count; x++)
 				{
 					vint4 color;
 
@@ -507,16 +507,16 @@ void store_image_block(
 	{
 		assert(img.data_type == ASTCENC_TYPE_F32);
 
-		for (unsigned int z = z_start; z < z_end; z++)
+		for (size_t z = z_start; z < z_end; z++)
 		{
 			// Fetch the image plane
 			float* data32 = static_cast<float*>(img.data[z]);
 
-			for (unsigned int y = y_start; y < y_end; y++)
+			for (size_t y = y_start; y < y_end; y++)
 			{
 				float* data32_row = data32 + (4 * x_size * y) + (4 * x_start);
 
-				for (unsigned int x = 0; x < x_count; x++)
+				for (size_t x = 0; x < x_count; x++)
 				{
 					vfloat4 color = blk.texel(idx);
 
diff --git a/Source/astcenc_integer_sequence.cpp b/Source/astcenc_integer_sequence.cpp
index 41dc38b7..ab776979 100644
--- a/Source/astcenc_integer_sequence.cpp
+++ b/Source/astcenc_integer_sequence.cpp
@@ -416,8 +416,8 @@ static const std::array<ise_size, 21> ise_sizes {{
 }};
 
 /* See header for documentation. */
-unsigned int get_ise_sequence_bitcount(
-	unsigned int character_count,
+size_t get_ise_sequence_bitcount(
+	size_t character_count,
 	quant_method quant_level
 ) {
 	// Cope with out-of bounds values - input might be invalid
@@ -428,7 +428,7 @@ unsigned int get_ise_sequence_bitcount(
 	}
 
 	auto& entry = ise_sizes[quant_level];
-	unsigned int divisor = (entry.divisor << 1) + 1;
+	size_t divisor = (entry.divisor << 1) + 1;
 	return (entry.scale * character_count + divisor - 1) / divisor;
 }
 
@@ -444,12 +444,12 @@ unsigned int get_ise_sequence_bitcount(
  * @param[in,out] ptr         The data pointer to write to.
  */
 static inline void write_bits(
-	unsigned int value,
-	unsigned int bitcount,
-	unsigned int bitoffset,
+	size_t value,
+	size_t bitcount,
+	size_t bitoffset,
 	uint8_t ptr[2]
 ) {
-	unsigned int mask = (1 << bitcount) - 1;
+	size_t mask = (1 << bitcount) - 1;
 	value &= mask;
 	ptr += bitoffset >> 3;
 	bitoffset &= 7;
@@ -475,15 +475,15 @@ static inline void write_bits(
  *
  * @return The read value.
  */
-static inline unsigned int read_bits(
-	unsigned int bitcount,
-	unsigned int bitoffset,
+static inline size_t read_bits(
+	size_t bitcount,
+	size_t bitoffset,
 	const uint8_t* ptr
 ) {
-	unsigned int mask = (1 << bitcount) - 1;
+	size_t mask = (1 << bitcount) - 1;
 	ptr += bitoffset >> 3;
 	bitoffset &= 7;
-	unsigned int value = ptr[0] | (ptr[1] << 8);
+	size_t value = ptr[0] | (ptr[1] << 8);
 	value >>= bitoffset;
 	value &= mask;
 	return value;
@@ -492,31 +492,31 @@ static inline unsigned int read_bits(
 /* See header for documentation. */
 void encode_ise(
 	quant_method quant_level,
-	unsigned int character_count,
+	size_t character_count,
 	const uint8_t* input_data,
 	uint8_t* output_data,
-	unsigned int bit_offset
+	size_t bit_offset
 ) {
 	promise(character_count > 0);
 
-	unsigned int bits = btq_counts[quant_level].bits;
-	unsigned int trits = btq_counts[quant_level].trits;
-	unsigned int quints = btq_counts[quant_level].quints;
-	unsigned int mask = (1 << bits) - 1;
+	size_t bits = btq_counts[quant_level].bits;
+	size_t trits = btq_counts[quant_level].trits;
+	size_t quints = btq_counts[quant_level].quints;
+	size_t mask = (1 << bits) - 1;
 
 	// Write out trits and bits
 	if (trits)
 	{
-		unsigned int i = 0;
-		unsigned int full_trit_blocks = character_count / 5;
+		size_t i = 0;
+		size_t full_trit_blocks = character_count / 5;
 
-		for (unsigned int j = 0; j < full_trit_blocks; j++)
+		for (size_t j = 0; j < full_trit_blocks; j++)
 		{
-			unsigned int i4 = input_data[i + 4] >> bits;
-			unsigned int i3 = input_data[i + 3] >> bits;
-			unsigned int i2 = input_data[i + 2] >> bits;
-			unsigned int i1 = input_data[i + 1] >> bits;
-			unsigned int i0 = input_data[i + 0] >> bits;
+			size_t i4 = input_data[i + 4] >> bits;
+			size_t i3 = input_data[i + 3] >> bits;
+			size_t i2 = input_data[i + 2] >> bits;
+			size_t i1 = input_data[i + 1] >> bits;
+			size_t i0 = input_data[i + 0] >> bits;
 
 			uint8_t T = integer_of_trits[i4][i3][i2][i1][i0];
 
@@ -555,15 +555,15 @@ void encode_ise(
 		{
 			// i4 cannot be present - we know the block is partial
 			// i0 must be present - we know the block isn't empty
-			unsigned int i4 =                            0;
-			unsigned int i3 = i + 3 >= character_count ? 0 : input_data[i + 3] >> bits;
-			unsigned int i2 = i + 2 >= character_count ? 0 : input_data[i + 2] >> bits;
-			unsigned int i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits;
-			unsigned int i0 =                                input_data[i + 0] >> bits;
+			size_t i4 =                            0;
+			size_t i3 = i + 3 >= character_count ? 0 : input_data[i + 3] >> bits;
+			size_t i2 = i + 2 >= character_count ? 0 : input_data[i + 2] >> bits;
+			size_t i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits;
+			size_t i0 =                                input_data[i + 0] >> bits;
 
 			uint8_t T = integer_of_trits[i4][i3][i2][i1][i0];
 
-			for (unsigned int j = 0; i < character_count; i++, j++)
+			for (size_t j = 0; i < character_count; i++, j++)
 			{
 				// Truncated table as this iteration is always partital
 				static const uint8_t tbits[4]  { 2, 2, 1, 2 };
@@ -580,14 +580,14 @@ void encode_ise(
 	// Write out quints and bits
 	else if (quints)
 	{
-		unsigned int i = 0;
-		unsigned int full_quint_blocks = character_count / 3;
+		size_t i = 0;
+		size_t full_quint_blocks = character_count / 3;
 
-		for (unsigned int j = 0; j < full_quint_blocks; j++)
+		for (size_t j = 0; j < full_quint_blocks; j++)
 		{
-			unsigned int i2 = input_data[i + 2] >> bits;
-			unsigned int i1 = input_data[i + 1] >> bits;
-			unsigned int i0 = input_data[i + 0] >> bits;
+			size_t i2 = input_data[i + 2] >> bits;
+			size_t i1 = input_data[i + 1] >> bits;
+			size_t i0 = input_data[i + 0] >> bits;
 
 			uint8_t T = integer_of_quints[i2][i1][i0];
 
@@ -616,13 +616,13 @@ void encode_ise(
 		{
 			// i2 cannot be present - we know the block is partial
 			// i0 must be present - we know the block isn't empty
-			unsigned int i2 =                            0;
-			unsigned int i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits;
-			unsigned int i0 =                                input_data[i + 0] >> bits;
+			size_t i2 =                            0;
+			size_t i1 = i + 1 >= character_count ? 0 : input_data[i + 1] >> bits;
+			size_t i0 =                                input_data[i + 0] >> bits;
 
 			uint8_t T = integer_of_quints[i2][i1][i0];
 
-			for (unsigned int j = 0; i < character_count; i++, j++)
+			for (size_t j = 0; i < character_count; i++, j++)
 			{
 				// Truncated table as this iteration is always partital
 				static const uint8_t tbits[2]  { 3, 2 };
@@ -639,7 +639,7 @@ void encode_ise(
 	// Write out just bits
 	else
 	{
-		for (unsigned int i = 0; i < character_count; i++)
+		for (size_t i = 0; i < character_count; i++)
 		{
 			write_bits(input_data[i], bits, bit_offset, output_data);
 			bit_offset += bits;
@@ -650,10 +650,10 @@ void encode_ise(
 /* See header for documentation. */
 void decode_ise(
 	quant_method quant_level,
-	unsigned int character_count,
+	size_t character_count,
 	const uint8_t* input_data,
 	uint8_t* output_data,
-	unsigned int bit_offset
+	size_t bit_offset
 ) {
 	promise(character_count > 0);
 
@@ -663,15 +663,15 @@ void decode_ise(
 	uint8_t results[68];
 	uint8_t tq_blocks[22] { 0 }; // Trit-blocks or quint-blocks, must be zeroed
 
-	unsigned int bits = btq_counts[quant_level].bits;
-	unsigned int trits = btq_counts[quant_level].trits;
-	unsigned int quints = btq_counts[quant_level].quints;
+	size_t bits = btq_counts[quant_level].bits;
+	size_t trits = btq_counts[quant_level].trits;
+	size_t quints = btq_counts[quant_level].quints;
 
-	unsigned int lcounter = 0;
-	unsigned int hcounter = 0;
+	size_t lcounter = 0;
+	size_t hcounter = 0;
 
 	// Collect bits for each element, as well as bits for any trit-blocks and quint-blocks.
-	for (unsigned int i = 0; i < character_count; i++)
+	for (size_t i = 0; i < character_count; i++)
 	{
 		results[i] = static_cast<uint8_t>(read_bits(bits, bit_offset, input_data));
 		bit_offset += bits;
@@ -682,7 +682,7 @@ void decode_ise(
 			static const uint8_t block_shift[5]   { 0, 2, 4, 5, 7 };
 			static const uint8_t next_lcounter[5] { 1, 2, 3, 4, 0 };
 			static const uint8_t hcounter_incr[5] { 0, 0, 0, 0, 1 };
-			unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
+			size_t tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
 			bit_offset += bits_to_read[lcounter];
 			tq_blocks[hcounter] |= tdata << block_shift[lcounter];
 			hcounter += hcounter_incr[lcounter];
@@ -695,7 +695,7 @@ void decode_ise(
 			static const uint8_t block_shift[3]   { 0, 3, 5 };
 			static const uint8_t next_lcounter[3] { 1, 2, 0 };
 			static const uint8_t hcounter_incr[3] { 0, 0, 1 };
-			unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
+			size_t tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
 			bit_offset += bits_to_read[lcounter];
 			tq_blocks[hcounter] |= tdata << block_shift[lcounter];
 			hcounter += hcounter_incr[lcounter];
@@ -706,9 +706,9 @@ void decode_ise(
 	// Unpack trit-blocks or quint-blocks as needed
 	if (trits)
 	{
-		unsigned int trit_blocks = (character_count + 4) / 5;
+		size_t trit_blocks = (character_count + 4) / 5;
 		promise(trit_blocks > 0);
-		for (unsigned int i = 0; i < trit_blocks; i++)
+		for (size_t i = 0; i < trit_blocks; i++)
 		{
 			const uint8_t *tritptr = trits_of_integer[tq_blocks[i]];
 			results[5 * i    ] |= tritptr[0] << bits;
@@ -721,9 +721,9 @@ void decode_ise(
 
 	if (quints)
 	{
-		unsigned int quint_blocks = (character_count + 2) / 3;
+		size_t quint_blocks = (character_count + 2) / 3;
 		promise(quint_blocks > 0);
-		for (unsigned int i = 0; i < quint_blocks; i++)
+		for (size_t i = 0; i < quint_blocks; i++)
 		{
 			const uint8_t *quintptr = quints_of_integer[tq_blocks[i]];
 			results[3 * i    ] |= quintptr[0] << bits;
@@ -732,7 +732,7 @@ void decode_ise(
 		}
 	}
 
-	for (unsigned int i = 0; i < character_count; i++)
+	for (size_t i = 0; i < character_count; i++)
 	{
 		output_data[i] = results[i];
 	}
diff --git a/Source/astcenc_internal.h b/Source/astcenc_internal.h
index 008e1f5f..bed460ee 100644
--- a/Source/astcenc_internal.h
+++ b/Source/astcenc_internal.h
@@ -69,31 +69,31 @@
 #endif
 
 /** @brief The maximum number of texels a block can support (6x6x6 block). */
-static constexpr unsigned int BLOCK_MAX_TEXELS { ASTCENC_BLOCK_MAX_TEXELS };
+static constexpr size_t BLOCK_MAX_TEXELS { ASTCENC_BLOCK_MAX_TEXELS };
 
 /** @brief The maximum number of components a block can support. */
-static constexpr unsigned int BLOCK_MAX_COMPONENTS { 4 };
+static constexpr size_t BLOCK_MAX_COMPONENTS { 4 };
 
 /** @brief The maximum number of partitions a block can support. */
-static constexpr unsigned int BLOCK_MAX_PARTITIONS { 4 };
+static constexpr size_t BLOCK_MAX_PARTITIONS { 4 };
 
 /** @brief The number of partitionings, per partition count, suported by the ASTC format. */
-static constexpr unsigned int BLOCK_MAX_PARTITIONINGS { 1024 };
+static constexpr size_t BLOCK_MAX_PARTITIONINGS { 1024 };
 
 /** @brief The maximum number of texels used during partition selection for texel clustering. */
 static constexpr uint8_t BLOCK_MAX_KMEANS_TEXELS { 64 };
 
 /** @brief The maximum number of weights a block can support. */
-static constexpr unsigned int BLOCK_MAX_WEIGHTS { 64 };
+static constexpr size_t BLOCK_MAX_WEIGHTS { 64 };
 
 /** @brief The maximum number of weights a block can support per plane in 2 plane mode. */
-static constexpr unsigned int BLOCK_MAX_WEIGHTS_2PLANE { BLOCK_MAX_WEIGHTS / 2 };
+static constexpr size_t BLOCK_MAX_WEIGHTS_2PLANE { BLOCK_MAX_WEIGHTS / 2 };
 
 /** @brief The minimum number of weight bits a candidate encoding must encode. */
-static constexpr unsigned int BLOCK_MIN_WEIGHT_BITS { 24 };
+static constexpr size_t BLOCK_MIN_WEIGHT_BITS { 24 };
 
 /** @brief The maximum number of weight bits a candidate encoding can encode. */
-static constexpr unsigned int BLOCK_MAX_WEIGHT_BITS { 96 };
+static constexpr size_t BLOCK_MAX_WEIGHT_BITS { 96 };
 
 /** @brief The index indicating a bad (unused) block mode in the remap array. */
 static constexpr uint16_t BLOCK_BAD_BLOCK_MODE { 0xFFFFu };
@@ -102,19 +102,19 @@ static constexpr uint16_t BLOCK_BAD_BLOCK_MODE { 0xFFFFu };
 static constexpr uint16_t BLOCK_BAD_PARTITIONING { 0xFFFFu };
 
 /** @brief The number of partition index bits supported by the ASTC format . */
-static constexpr unsigned int PARTITION_INDEX_BITS { 10 };
+static constexpr size_t PARTITION_INDEX_BITS { 10 };
 
 /** @brief The offset of the plane 2 weights in shared weight arrays. */
-static constexpr unsigned int WEIGHTS_PLANE2_OFFSET { BLOCK_MAX_WEIGHTS_2PLANE };
+static constexpr size_t WEIGHTS_PLANE2_OFFSET { BLOCK_MAX_WEIGHTS_2PLANE };
 
 /** @brief The sum of quantized weights for one texel. */
 static constexpr float WEIGHTS_TEXEL_SUM { 16.0f };
 
 /** @brief The number of block modes supported by the ASTC format. */
-static constexpr unsigned int WEIGHTS_MAX_BLOCK_MODES { 2048 };
+static constexpr size_t WEIGHTS_MAX_BLOCK_MODES { 2048 };
 
 /** @brief The number of weight grid decimation modes supported by the ASTC format. */
-static constexpr unsigned int WEIGHTS_MAX_DECIMATION_MODES { 87 };
+static constexpr size_t WEIGHTS_MAX_DECIMATION_MODES { 87 };
 
 /** @brief The high default error used to initialize error trackers. */
 static constexpr float ERROR_CALC_DEFAULT { 1e30f };
@@ -129,14 +129,14 @@ static constexpr float TUNE_MIN_SEARCH_MODE0 { 0.85f };
  *
  * This can be dynamically reduced by the compression quality preset.
  */
-static constexpr unsigned int TUNE_MAX_TRIAL_CANDIDATES { 8 };
+static constexpr size_t TUNE_MAX_TRIAL_CANDIDATES { 8 };
 
 /**
  * @brief The maximum number of candidate partitionings tested for each encoding mode.
  *
  * This can be dynamically reduced by the compression quality preset.
  */
-static constexpr unsigned int TUNE_MAX_PARTITIONING_CANDIDATES { 8 };
+static constexpr size_t TUNE_MAX_PARTITIONING_CANDIDATES { 8 };
 
 /**
  * @brief The maximum quant level using full angular endpoint search method.
@@ -151,7 +151,7 @@ static constexpr unsigned int TUNE_MAX_PARTITIONING_CANDIDATES { 8 };
  * one 8-wide vector. Decreasing by one doesn't buy much performance, and
  * increasing by one is disproportionately expensive.
  */
-static constexpr unsigned int TUNE_MAX_ANGULAR_QUANT { 7 }; /* QUANT_12 */
+static constexpr size_t TUNE_MAX_ANGULAR_QUANT { 7 }; /* QUANT_12 */
 
 static_assert((BLOCK_MAX_TEXELS % ASTCENC_SIMD_WIDTH) == 0,
               "BLOCK_MAX_TEXELS must be multiple of ASTCENC_SIMD_WIDTH");
@@ -232,7 +232,7 @@ enum quant_method
  *
  * @return   The number of levels used by @c method.
  */
-static inline unsigned int get_quant_level(quant_method method)
+static inline size_t get_quant_level(quant_method method)
 {
 	switch (method)
 	{
@@ -546,35 +546,35 @@ struct block_size_descriptor
 	 *
 	 * Always modes are stored at the start of the decimation_modes list.
 	 */
-	unsigned int decimation_mode_count_always;
+	size_t decimation_mode_count_always;
 
 	/** @brief The number of stored decimation modes for selected encodings. */
-	unsigned int decimation_mode_count_selected;
+	size_t decimation_mode_count_selected;
 
 	/** @brief The number of stored decimation modes for any encoding. */
-	unsigned int decimation_mode_count_all;
+	size_t decimation_mode_count_all;
 
 	/**
 	 * @brief The number of stored block modes which are "always" modes.
 	 *
 	 * Always modes are stored at the start of the block_modes list.
 	 */
-	unsigned int block_mode_count_1plane_always;
+	size_t block_mode_count_1plane_always;
 
 	/** @brief The number of stored block modes for active 1 plane encodings. */
-	unsigned int block_mode_count_1plane_selected;
+	size_t block_mode_count_1plane_selected;
 
 	/** @brief The number of stored block modes for active 1 and 2 plane encodings. */
-	unsigned int block_mode_count_1plane_2plane_selected;
+	size_t block_mode_count_1plane_2plane_selected;
 
 	/** @brief The number of stored block modes for any encoding. */
-	unsigned int block_mode_count_all;
+	size_t block_mode_count_all;
 
 	/** @brief The number of selected partitionings for 1/2/3/4 partitionings. */
-	unsigned int partitioning_count_selected[BLOCK_MAX_PARTITIONS];
+	size_t partitioning_count_selected[BLOCK_MAX_PARTITIONS];
 
 	/** @brief The number of partitionings for 1/2/3/4 partitionings. */
-	unsigned int partitioning_count_all[BLOCK_MAX_PARTITIONS];
+	size_t partitioning_count_all[BLOCK_MAX_PARTITIONS];
 
 	/** @brief The active decimation modes, stored in low indices. */
 	decimation_mode decimation_modes[WEIGHTS_MAX_DECIMATION_MODES];
@@ -633,9 +633,9 @@ struct block_size_descriptor
 	 *
 	 * @return The block mode structure.
 	 */
-	const block_mode& get_block_mode(unsigned int block_mode) const
+	const block_mode& get_block_mode(size_t block_mode) const
 	{
-		unsigned int packed_index = this->block_mode_packed_index[block_mode];
+		size_t packed_index = this->block_mode_packed_index[block_mode];
 		assert(packed_index != BLOCK_BAD_BLOCK_MODE && packed_index < this->block_mode_count_all);
 		return this->block_modes[packed_index];
 	}
@@ -651,7 +651,7 @@ struct block_size_descriptor
 	 *
 	 * @return The decimation mode structure.
 	 */
-	const decimation_mode& get_decimation_mode(unsigned int decimation_mode) const
+	const decimation_mode& get_decimation_mode(size_t decimation_mode) const
 	{
 		return this->decimation_modes[decimation_mode];
 	}
@@ -667,7 +667,7 @@ struct block_size_descriptor
 	 *
 	 * @return The decimation info structure.
 	 */
-	const decimation_info& get_decimation_info(unsigned int decimation_mode) const
+	const decimation_info& get_decimation_info(size_t decimation_mode) const
 	{
 		return this->decimation_tables[decimation_mode];
 	}
@@ -679,13 +679,13 @@ struct block_size_descriptor
 	 *
 	 * @return The pointer to the table of 1024 entries (for 2/3/4 parts) or 1 entry (for 1 part).
 	 */
-	const partition_info* get_partition_table(unsigned int partition_count) const
+	const partition_info* get_partition_table(size_t partition_count) const
 	{
 		if (partition_count == 1)
 		{
 			partition_count = 5;
 		}
-		unsigned int index = (partition_count - 2) * BLOCK_MAX_PARTITIONINGS;
+		size_t index = (partition_count - 2) * BLOCK_MAX_PARTITIONINGS;
 		return this->partitionings + index;
 	}
 
@@ -697,9 +697,9 @@ struct block_size_descriptor
 	 *
 	 * @return The partition info structure.
 	 */
-	const partition_info& get_partition_info(unsigned int partition_count, unsigned int index) const
+	const partition_info& get_partition_info(size_t partition_count, size_t index) const
 	{
-		unsigned int packed_index = 0;
+		size_t packed_index = 0;
 		if (partition_count >= 2)
 		{
 			packed_index = this->partitioning_packed_index[partition_count - 2][index];
@@ -719,7 +719,7 @@ struct block_size_descriptor
 	 *
 	 * @return The partition info structure.
 	 */
-	const partition_info& get_raw_partition_info(unsigned int partition_count, unsigned int packed_index) const
+	const partition_info& get_raw_partition_info(size_t partition_count, size_t packed_index) const
 	{
 		assert(packed_index != BLOCK_BAD_PARTITIONING && packed_index < this->partitioning_count_all[partition_count - 1]);
 		auto& result = get_partition_table(partition_count)[packed_index];
@@ -786,13 +786,13 @@ struct image_block
 	uint8_t alpha_lns[BLOCK_MAX_TEXELS];
 
 	/** @brief The X position of this block in the input or output image. */
-	unsigned int xpos;
+	size_t xpos;
 
 	/** @brief The Y position of this block in the input or output image. */
-	unsigned int ypos;
+	size_t ypos;
 
 	/** @brief The Z position of this block in the input or output image. */
-	unsigned int zpos;
+	size_t zpos;
 
 	/**
 	 * @brief Get an RGBA texel value from the data.
@@ -801,7 +801,7 @@ struct image_block
 	 *
 	 * @return The texel in RGBA component ordering.
 	 */
-	inline vfloat4 texel(unsigned int index) const
+	inline vfloat4 texel(size_t index) const
 	{
 		return vfloat4(data_r[index],
 		               data_g[index],
@@ -816,7 +816,7 @@ struct image_block
 	 *
 	 * @return The texel in RGB0 component ordering.
 	 */
-	inline vfloat4 texel3(unsigned int index) const
+	inline vfloat4 texel3(size_t index) const
 	{
 		return vfloat3(data_r[index],
 		               data_g[index],
@@ -884,7 +884,7 @@ struct image_block
 struct endpoints
 {
 	/** @brief The number of partition endpoints stored. */
-	unsigned int partition_count;
+	size_t partition_count;
 
 	/** @brief The colors for endpoint 0. */
 	vfloat4 endpt0[BLOCK_MAX_PARTITIONS];
@@ -1146,25 +1146,25 @@ struct pixel_region_args
 	bool have_z;
 
 	/** @brief The kernel radius for alpha processing. */
-	unsigned int alpha_kernel_radius;
+	size_t alpha_kernel_radius;
 
 	/** @brief The X dimension of the working data to process. */
-	unsigned int size_x;
+	size_t size_x;
 
 	/** @brief The Y dimension of the working data to process. */
-	unsigned int size_y;
+	size_t size_y;
 
 	/** @brief The Z dimension of the working data to process. */
-	unsigned int size_z;
+	size_t size_z;
 
 	/** @brief The X position of first src and dst data in the data set. */
-	unsigned int offset_x;
+	size_t offset_x;
 
 	/** @brief The Y position of first src and dst data in the data set. */
-	unsigned int offset_y;
+	size_t offset_y;
 
 	/** @brief The Z position of first src and dst data in the data set. */
-	unsigned int offset_z;
+	size_t offset_z;
 
 	/** @brief The working memory buffer. */
 	vfloat4 *work_memory;
@@ -1179,22 +1179,22 @@ struct avg_args
 	pixel_region_args arg;
 
 	/** @brief The image X dimensions. */
-	unsigned int img_size_x;
+	size_t img_size_x;
 
 	/** @brief The image Y dimensions. */
-	unsigned int img_size_y;
+	size_t img_size_y;
 
 	/** @brief The image Z dimensions. */
-	unsigned int img_size_z;
+	size_t img_size_z;
 
 	/** @brief The maximum working block dimensions in X and Y dimensions. */
-	unsigned int blk_size_xy;
+	size_t blk_size_xy;
 
 	/** @brief The maximum working block dimensions in Z dimensions. */
-	unsigned int blk_size_z;
+	size_t blk_size_z;
 
 	/** @brief The working block memory size. */
-	unsigned int work_memory_size;
+	size_t work_memory_size;
 };
 
 #if defined(ASTCENC_DIAGNOSTICS)
@@ -1211,7 +1211,7 @@ struct astcenc_contexti
 	astcenc_config config;
 
 	/** @brief The thread count supported by this context. */
-	unsigned int thread_count;
+	size_t thread_count;
 
 	/** @brief The block size descriptor this context was created with. */
 	block_size_descriptor* bsd;
@@ -1263,11 +1263,11 @@ struct astcenc_contexti
  * @param[out] bsd                      The descriptor to initialize.
  */
 void init_block_size_descriptor(
-	unsigned int x_texels,
-	unsigned int y_texels,
-	unsigned int z_texels,
+	size_t x_texels,
+	size_t y_texels,
+	size_t z_texels,
 	bool can_omit_modes,
-	unsigned int partition_count_cutoff,
+	size_t partition_count_cutoff,
 	float mode_cutoff,
 	block_size_descriptor& bsd);
 
@@ -1284,7 +1284,7 @@ void init_block_size_descriptor(
 void init_partition_tables(
 	block_size_descriptor& bsd,
 	bool can_omit_partitionings,
-	unsigned int partition_count_cutoff);
+	size_t partition_count_cutoff);
 
 /**
  * @brief Get the percentile table for 2D block modes.
@@ -1300,8 +1300,8 @@ void init_partition_tables(
  * @return The unpacked table.
  */
 const float* get_2d_percentile_table(
-	unsigned int xdim,
-	unsigned int ydim);
+	size_t xdim,
+	size_t ydim);
 
 /**
  * @brief Query if a 2D block size is legal.
@@ -1309,8 +1309,8 @@ const float* get_2d_percentile_table(
  * @return True if legal, false otherwise.
  */
 bool is_legal_2d_block_size(
-	unsigned int xdim,
-	unsigned int ydim);
+	size_t xdim,
+	size_t ydim);
 
 /**
  * @brief Query if a 3D block size is legal.
@@ -1318,9 +1318,9 @@ bool is_legal_2d_block_size(
  * @return True if legal, false otherwise.
  */
 bool is_legal_3d_block_size(
-	unsigned int xdim,
-	unsigned int ydim,
-	unsigned int zdim);
+	size_t xdim,
+	size_t ydim,
+	size_t zdim);
 
 /* ============================================================================
   Functionality for managing BISE quantization and unquantization.
@@ -1384,10 +1384,10 @@ extern const int8_t quant_mode_table[10][128];
  */
 void encode_ise(
 	quant_method quant_level,
-	unsigned int character_count,
+	size_t character_count,
 	const uint8_t* input_data,
 	uint8_t* output_data,
-	unsigned int bit_offset);
+	size_t bit_offset);
 
 /**
  * @brief Decode a packed string using BISE.
@@ -1403,10 +1403,10 @@ void encode_ise(
  */
 void decode_ise(
 	quant_method quant_level,
-	unsigned int character_count,
+	size_t character_count,
 	const uint8_t* input_data,
 	uint8_t* output_data,
-	unsigned int bit_offset);
+	size_t bit_offset);
 
 /**
  * @brief Return the number of bits needed to encode an ISE sequence.
@@ -1419,8 +1419,8 @@ void decode_ise(
  *
  * @return The number of bits needed to encode the BISE string.
  */
-unsigned int get_ise_sequence_bitcount(
-	unsigned int character_count,
+size_t get_ise_sequence_bitcount(
+	size_t character_count,
 	quant_method quant_level);
 
 /* ============================================================================
@@ -1441,8 +1441,8 @@ unsigned int get_ise_sequence_bitcount(
 void compute_avgs_and_dirs_2_comp(
 	const partition_info& pi,
 	const image_block& blk,
-	unsigned int component1,
-	unsigned int component2,
+	size_t component1,
+	size_t component2,
 	partition_metrics pm[BLOCK_MAX_PARTITIONS]);
 
 /**
@@ -1458,7 +1458,7 @@ void compute_avgs_and_dirs_2_comp(
 void compute_avgs_and_dirs_3_comp(
 	const partition_info& pi,
 	const image_block& blk,
-	unsigned int omitted_component,
+	size_t omitted_component,
 	partition_metrics pm[BLOCK_MAX_PARTITIONS]);
 
 /**
@@ -1559,13 +1559,13 @@ void compute_error_squared_rgba(
  *
  * @return The actual number of candidates returned.
  */
-unsigned int find_best_partition_candidates(
+size_t find_best_partition_candidates(
 	const block_size_descriptor& bsd,
 	const image_block& blk,
-	unsigned int partition_count,
-	unsigned int partition_search_limit,
-	unsigned int best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES],
-	unsigned int requested_candidates);
+	size_t partition_count,
+	size_t partition_search_limit,
+	size_t best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES],
+	size_t requested_candidates);
 
 /* ============================================================================
   Functionality for managing images and image related data.
@@ -1607,9 +1607,9 @@ static inline vmask4 get_u8_component_mask(
  *
  * @return The number of tasks in the processing stage.
  */
-unsigned int init_compute_averages(
+size_t init_compute_averages(
 	const astcenc_image& img,
-	unsigned int alpha_kernel_radius,
+	size_t alpha_kernel_radius,
 	const astcenc_swizzle& swz,
 	avg_args& ag);
 
@@ -1642,9 +1642,9 @@ void load_image_block(
 	const astcenc_image& img,
 	image_block& blk,
 	const block_size_descriptor& bsd,
-	unsigned int xpos,
-	unsigned int ypos,
-	unsigned int zpos,
+	size_t xpos,
+	size_t ypos,
+	size_t zpos,
 	const astcenc_swizzle& swz);
 
 /**
@@ -1667,9 +1667,9 @@ void load_image_block_fast_ldr(
 	const astcenc_image& img,
 	image_block& blk,
 	const block_size_descriptor& bsd,
-	unsigned int xpos,
-	unsigned int ypos,
-	unsigned int zpos,
+	size_t xpos,
+	size_t ypos,
+	size_t zpos,
 	const astcenc_swizzle& swz);
 
 /**
@@ -1687,9 +1687,9 @@ void store_image_block(
 	astcenc_image& img,
 	const image_block& blk,
 	const block_size_descriptor& bsd,
-	unsigned int xpos,
-	unsigned int ypos,
-	unsigned int zpos,
+	size_t xpos,
+	size_t ypos,
+	size_t zpos,
 	const astcenc_swizzle& swz);
 
 /* ============================================================================
@@ -1728,7 +1728,7 @@ void compute_ideal_colors_and_weights_1plane(
 void compute_ideal_colors_and_weights_2planes(
 	const block_size_descriptor& bsd,
 	const image_block& blk,
-	unsigned int plane2_component,
+	size_t plane2_component,
 	endpoints_and_weights& ei1,
 	endpoints_and_weights& ei2);
 
@@ -1935,15 +1935,15 @@ void unpack_weights(
  *
  * @return The actual number of candidate matches returned.
  */
-unsigned int compute_ideal_endpoint_formats(
+size_t compute_ideal_endpoint_formats(
 	const partition_info& pi,
 	const image_block& blk,
 	const endpoints& ep,
 	const int8_t* qwt_bitcounts,
 	const float* qwt_errors,
-	unsigned int tune_candidate_limit,
-	unsigned int start_block_mode,
-	unsigned int end_block_mode,
+	size_t tune_candidate_limit,
+	size_t start_block_mode,
+	size_t end_block_mode,
 	uint8_t partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS],
 	int block_mode[TUNE_MAX_TRIAL_CANDIDATES],
 	quant_method quant_level[TUNE_MAX_TRIAL_CANDIDATES],
@@ -2018,7 +2018,7 @@ void compute_angular_endpoints_1plane(
 	bool only_always,
 	const block_size_descriptor& bsd,
 	const float* dec_weight_ideal_value,
-	unsigned int max_weight_quant,
+	size_t max_weight_quant,
 	compression_working_buffers& tmpbuf);
 
 /**
@@ -2032,7 +2032,7 @@ void compute_angular_endpoints_1plane(
 void compute_angular_endpoints_2planes(
 	const block_size_descriptor& bsd,
 	const float* dec_weight_ideal_value,
-	unsigned int max_weight_quant,
+	size_t max_weight_quant,
 	compression_working_buffers& tmpbuf);
 
 /* ============================================================================
diff --git a/Source/astcenc_internal_entry.h b/Source/astcenc_internal_entry.h
index 966c1d31..32ab8e5b 100644
--- a/Source/astcenc_internal_entry.h
+++ b/Source/astcenc_internal_entry.h
@@ -113,13 +113,13 @@ class ParallelManager
 	std::condition_variable m_complete;
 
 	/** @brief Number of tasks started, but not necessarily finished. */
-	std::atomic<unsigned int> m_start_count;
+	std::atomic<size_t> m_start_count;
 
 	/** @brief Number of tasks finished. */
-	unsigned int m_done_count;
+	size_t m_done_count;
 
 	/** @brief Number of tasks that need to be processed. */
-	unsigned int m_task_count;
+	size_t m_task_count;
 
 	/** @brief Progress callback (optional). */
 	astcenc_progress_callback m_callback;
@@ -178,7 +178,7 @@ class ParallelManager
 	 * @param init_func   Callable which executes the stage initialization. It must return the
 	 *                    total number of tasks in the stage.
 	 */
-	void init(std::function<unsigned int(void)> init_func)
+	void init(std::function<size_t(void)> init_func)
 	{
 		std::lock_guard<std::mutex> lck(m_lock);
 		if (!m_init_done)
@@ -197,7 +197,7 @@ class ParallelManager
 	 * @param task_count   Total number of tasks needing processing.
 	 * @param callback     Function pointer for progress status callbacks.
 	 */
-	void init(unsigned int task_count, astcenc_progress_callback callback)
+	void init(size_t task_count, astcenc_progress_callback callback)
 	{
 		std::lock_guard<std::mutex> lck(m_lock);
 		if (!m_init_done)
@@ -222,9 +222,9 @@ class ParallelManager
 	 *
 	 * @return Task index of the first assigned task; assigned tasks increment from this.
 	 */
-	unsigned int get_task_assignment(unsigned int granule, unsigned int& count)
+	size_t get_task_assignment(size_t granule, size_t& count)
 	{
-		unsigned int base = m_start_count.fetch_add(granule, std::memory_order_relaxed);
+		size_t base = m_start_count.fetch_add(granule, std::memory_order_relaxed);
 		if (m_is_cancelled || base >= m_task_count)
 		{
 			count = 0;
@@ -243,11 +243,11 @@ class ParallelManager
 	 *
 	 * @param count   The number of completed tasks.
 	 */
-	void complete_task_assignment(unsigned int count)
+	void complete_task_assignment(size_t count)
 	{
 		// Note: m_done_count cannot use an atomic without the mutex; this has a race between the
 		// update here and the wait() for other threads
-		unsigned int local_count;
+		size_t local_count;
 		float local_last_value;
 		{
 			std::unique_lock<std::mutex> lck(m_lock);
diff --git a/Source/astcenc_mathlib.h b/Source/astcenc_mathlib.h
index 1d73bf1d..e0d9e848 100644
--- a/Source/astcenc_mathlib.h
+++ b/Source/astcenc_mathlib.h
@@ -131,6 +131,9 @@ typedef union
 	float f;
 } if32;
 
+// size_t integer literal
+constexpr size_t operator "" _z ( unsigned long long n ) { return n; }
+
 // These are namespaced to avoid colliding with C standard library functions.
 namespace astc
 {
diff --git a/Source/astcenc_partition_tables.cpp b/Source/astcenc_partition_tables.cpp
index cad42384..b349fe39 100644
--- a/Source/astcenc_partition_tables.cpp
+++ b/Source/astcenc_partition_tables.cpp
@@ -36,12 +36,12 @@
  * @param[out] bit_pattern          The output bit pattern representation.
  */
 static void generate_canonical_partitioning(
-	unsigned int texel_count,
+	size_t texel_count,
 	const uint8_t* partition_of_texel,
 	uint64_t bit_pattern[BIT_PATTERN_WORDS]
 ) {
 	// Clear the pattern
-	for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++)
+	for (size_t i = 0; i < BIT_PATTERN_WORDS; i++)
 	{
 		bit_pattern[i] = 0;
 	}
@@ -52,12 +52,12 @@ static void generate_canonical_partitioning(
 	int mapped_index[BLOCK_MAX_PARTITIONS];
 	int map_weight_count = 0;
 
-	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
+	for (size_t i = 0; i < BLOCK_MAX_PARTITIONS; i++)
 	{
 		mapped_index[i] = -1;
 	}
 
-	for (unsigned int i = 0; i < texel_count; i++)
+	for (size_t i = 0; i < texel_count; i++)
 	{
 		int index = partition_of_texel[i];
 		if (mapped_index[index] < 0)
@@ -275,9 +275,9 @@ static uint8_t select_partition(
  */
 static bool generate_one_partition_info_entry(
 	block_size_descriptor& bsd,
-	unsigned int partition_count,
-	unsigned int partition_index,
-	unsigned int partition_remap_index,
+	size_t partition_count,
+	size_t partition_index,
+	size_t partition_remap_index,
 	partition_info& pi
 ) {
 	int texels_per_block = bsd.texel_count;
@@ -288,11 +288,11 @@ static bool generate_one_partition_info_entry(
 	// Assign texels to partitions
 	int texel_idx = 0;
 	int counts[BLOCK_MAX_PARTITIONS] { 0 };
-	for (unsigned int z = 0; z < bsd.zdim; z++)
+	for (size_t z = 0; z < bsd.zdim; z++)
 	{
-		for (unsigned int y = 0; y <  bsd.ydim; y++)
+		for (size_t y = 0; y <  bsd.ydim; y++)
 		{
-			for (unsigned int x = 0; x <  bsd.xdim; x++)
+			for (size_t x = 0; x <  bsd.xdim; x++)
 			{
 				uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
 				pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++);
@@ -302,7 +302,7 @@ static bool generate_one_partition_info_entry(
 	}
 
 	// Fill loop tail so we can overfetch later
-	for (unsigned int i = 0; i < partition_count; i++)
+	for (size_t i = 0; i < partition_count; i++)
 	{
 		int ptex_count = counts[i];
 		int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count);
@@ -352,7 +352,7 @@ static bool generate_one_partition_info_entry(
 		bitmaps = bsd.coverage_bitmaps_4[partition_remap_index];
 	}
 
-	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
+	for (size_t i = 0; i < BLOCK_MAX_PARTITIONS; i++)
 	{
 		pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]);
 	}
@@ -363,15 +363,15 @@ static bool generate_one_partition_info_entry(
 	if (bitmaps)
 	{
 		// Populate the partition coverage bitmap
-		for (unsigned int i = 0; i < partition_count; i++)
+		for (size_t i = 0; i < partition_count; i++)
 		{
 			bitmaps[i] = 0ULL;
 		}
 
-		unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
-		for (unsigned int i = 0; i < texels_to_process; i++)
+		size_t texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
+		for (size_t i = 0; i < texels_to_process; i++)
 		{
-			unsigned int idx = bsd.kmeans_texels[i];
+			size_t idx = bsd.kmeans_texels[i];
 			bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i;
 		}
 	}
@@ -382,12 +382,12 @@ static bool generate_one_partition_info_entry(
 static void build_partition_table_for_one_partition_count(
 	block_size_descriptor& bsd,
 	bool can_omit_partitionings,
-	unsigned int partition_count_cutoff,
-	unsigned int partition_count,
+	size_t partition_count_cutoff,
+	size_t partition_count,
 	partition_info* ptab,
 	uint64_t* canonical_patterns
 ) {
-	unsigned int next_index = 0;
+	size_t next_index = 0;
 	bsd.partitioning_count_selected[partition_count - 1] = 0;
 	bsd.partitioning_count_all[partition_count - 1] = 0;
 
@@ -400,13 +400,13 @@ static void build_partition_table_for_one_partition_count(
 	// Iterate through twice
 	//   - Pass 0: Keep selected partitionings
 	//   - Pass 1: Keep non-selected partitionings (skip if in omit mode)
-	unsigned int max_iter = can_omit_partitionings ? 1 : 2;
+	size_t max_iter = can_omit_partitionings ? 1 : 2;
 
 	// Tracker for things we built in the first iteration
 	uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 };
-	for (unsigned int x = 0; x < max_iter; x++)
+	for (size_t x = 0; x < max_iter; x++)
 	{
-		for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
+		for (size_t i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
 		{
 			// Don't include things we built in the first pass
 			if ((x == 1) && build[i])
@@ -422,7 +422,7 @@ static void build_partition_table_for_one_partition_count(
 
 			generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS);
 			bool keep_canonical = true;
-			for (unsigned int j = 0; j < next_index; j++)
+			for (size_t j = 0; j < next_index; j++)
 			{
 				bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns +  j * BIT_PATTERN_WORDS);
 				if (match)
@@ -460,7 +460,7 @@ static void build_partition_table_for_one_partition_count(
 void init_partition_tables(
 	block_size_descriptor& bsd,
 	bool can_omit_partitionings,
-	unsigned int partition_count_cutoff
+	size_t partition_count_cutoff
 ) {
 	partition_info* par_tab2 = bsd.partitionings;
 	partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS;
diff --git a/Source/astcenc_percentile_tables.cpp b/Source/astcenc_percentile_tables.cpp
index 448ddcc9..952e91ed 100644
--- a/Source/astcenc_percentile_tables.cpp
+++ b/Source/astcenc_percentile_tables.cpp
@@ -1163,14 +1163,14 @@ static const packed_percentile_table *get_packed_table(
 
 /* See header for documentation. */
 const float *get_2d_percentile_table(
-	unsigned int xdim,
-	unsigned int ydim
+	size_t xdim,
+	size_t ydim
 ) {
 	float* unpacked_table = new float[WEIGHTS_MAX_BLOCK_MODES];
 	const packed_percentile_table *apt = get_packed_table(xdim, ydim);
 
 	// Set the default percentile
-	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
+	for (size_t i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
 	{
 		unpacked_table[i] = 1.0f;
 	}
@@ -1178,16 +1178,16 @@ const float *get_2d_percentile_table(
 	// Populate the unpacked percentile values
 	for (int i = 0; i < 2; i++)
 	{
-		unsigned int itemcount = apt->item_count[i];
-		unsigned int difscale = apt->difscales[i];
-		unsigned int accum = apt->initial_percs[i];
+		size_t itemcount = apt->item_count[i];
+		size_t difscale = apt->difscales[i];
+		size_t accum = apt->initial_percs[i];
 		const uint16_t *item_ptr = apt->items[i];
 
-		for (unsigned int j = 0; j < itemcount; j++)
+		for (size_t j = 0; j < itemcount; j++)
 		{
 			uint16_t item = item_ptr[j];
-			unsigned int idx = item & 0x7FF;
-			unsigned int weight = (item >> 11) & 0x1F;
+			size_t idx = item & 0x7FF;
+			size_t weight = (item >> 11) & 0x1F;
 			accum += weight;
 			unpacked_table[idx] = static_cast<float>(accum) / static_cast<float>(difscale);
 		}
@@ -1199,10 +1199,10 @@ const float *get_2d_percentile_table(
 
 /* See header for documentation. */
 bool is_legal_2d_block_size(
-	unsigned int xdim,
-	unsigned int ydim
+	size_t xdim,
+	size_t ydim
 ) {
-	unsigned int idx = (xdim << 8) | ydim;
+	size_t idx = (xdim << 8) | ydim;
 	switch (idx)
 	{
 		case 0x0404:
@@ -1227,11 +1227,11 @@ bool is_legal_2d_block_size(
 
 /* See header for documentation. */
 bool is_legal_3d_block_size(
-	unsigned int xdim,
-	unsigned int ydim,
-	unsigned int zdim
+	size_t xdim,
+	size_t ydim,
+	size_t zdim
 ) {
-	unsigned int idx = (xdim << 16) | (ydim << 8) | zdim;
+	size_t idx = (xdim << 16) | (ydim << 8) | zdim;
 	switch (idx)
 	{
 		case 0x030303:
diff --git a/Source/astcenc_pick_best_endpoint_format.cpp b/Source/astcenc_pick_best_endpoint_format.cpp
index bf872a92..ae172be3 100644
--- a/Source/astcenc_pick_best_endpoint_format.cpp
+++ b/Source/astcenc_pick_best_endpoint_format.cpp
@@ -85,7 +85,7 @@ static void compute_error_squared_rgb_single_partition(
 ) {
 	vfloat4 ews = blk.channel_weight;
 
-	unsigned int texel_count = pi.partition_texel_count[partition_index];
+	size_t texel_count = pi.partition_texel_count[partition_index];
 	const uint8_t* texel_indexes = pi.texels_of_partition[partition_index];
 	promise(texel_count > 0);
 
@@ -121,7 +121,7 @@ static void compute_error_squared_rgb_single_partition(
 	vfloat l_bs2(l_pline.bs.lane<2>());
 
 	vint lane_ids = vint::lane_id();
-	for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+	for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
 	{
 		const uint8_t* tix = texel_indexes + i;
 
@@ -1087,16 +1087,16 @@ static float four_partitions_find_best_combination_for_bitcount(
 }
 
 /* See header for documentation. */
-unsigned int compute_ideal_endpoint_formats(
+size_t compute_ideal_endpoint_formats(
 	const partition_info& pi,
 	const image_block& blk,
 	const endpoints& ep,
 	 // bitcounts and errors computed for the various quantization methods
 	const int8_t* qwt_bitcounts,
 	const float* qwt_errors,
-	unsigned int tune_candidate_limit,
-	unsigned int start_block_mode,
-	unsigned int end_block_mode,
+	size_t tune_candidate_limit,
+	size_t start_block_mode,
+	size_t end_block_mode,
 	// output data
 	uint8_t partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS],
 	int block_mode[TUNE_MAX_TRIAL_CANDIDATES],
@@ -1135,13 +1135,13 @@ unsigned int compute_ideal_endpoint_formats(
 	vfloat clear_error(ERROR_CALC_DEFAULT);
 	vint clear_quant(0);
 
-	unsigned int packed_start_block_mode = round_down_to_simd_multiple_vla(start_block_mode);
+	size_t packed_start_block_mode = round_down_to_simd_multiple_vla(start_block_mode);
 	storea(clear_error, errors_of_best_combination + packed_start_block_mode);
 	store_nbytes(clear_quant, best_quant_levels + packed_start_block_mode);
 	store_nbytes(clear_quant, best_quant_levels_mod + packed_start_block_mode);
 
 	// Ensure that last iteration overstep contains data that will never be picked
-	unsigned int packed_end_block_mode = round_down_to_simd_multiple_vla(end_block_mode - 1);
+	size_t packed_end_block_mode = round_down_to_simd_multiple_vla(end_block_mode - 1);
 	storea(clear_error, errors_of_best_combination + packed_end_block_mode);
 	store_nbytes(clear_quant, best_quant_levels + packed_end_block_mode);
 	store_nbytes(clear_quant, best_quant_levels_mod + packed_end_block_mode);
@@ -1153,7 +1153,7 @@ unsigned int compute_ideal_endpoint_formats(
 	// The block contains 1 partition
 	if (partition_count == 1)
 	{
-		for (unsigned int i = start_block_mode; i < end_block_mode; i++)
+		for (size_t i = start_block_mode; i < end_block_mode; i++)
 		{
 			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
 			{
@@ -1186,7 +1186,7 @@ unsigned int compute_ideal_endpoint_formats(
 		    best_error, format_of_choice, combined_best_error, formats_of_choice);
 
 		assert(start_block_mode == 0);
-		for (unsigned int i = 0; i < end_block_mode; i++)
+		for (size_t i = 0; i < end_block_mode; i++)
 		{
 			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
 			{
@@ -1219,7 +1219,7 @@ unsigned int compute_ideal_endpoint_formats(
 		    best_error, format_of_choice, combined_best_error, formats_of_choice);
 
 		assert(start_block_mode == 0);
-		for (unsigned int i = 0; i < end_block_mode; i++)
+		for (size_t i = 0; i < end_block_mode; i++)
 		{
 			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
 			{
@@ -1253,7 +1253,7 @@ unsigned int compute_ideal_endpoint_formats(
 		    best_error, format_of_choice, combined_best_error, formats_of_choice);
 
 		assert(start_block_mode == 0);
-		for (unsigned int i = 0; i < end_block_mode; i++)
+		for (size_t i = 0; i < end_block_mode; i++)
 		{
 			if (qwt_errors[i] >= ERROR_CALC_DEFAULT)
 			{
@@ -1287,14 +1287,14 @@ unsigned int compute_ideal_endpoint_formats(
 	}
 
 	// Search the remaining results and pick the best candidate modes for trial 1+
-	for (unsigned int i = 1; i < tune_candidate_limit; i++)
+	for (size_t i = 1; i < tune_candidate_limit; i++)
 	{
 		vint vbest_error_index(-1);
 		vfloat vbest_ep_error(ERROR_CALC_DEFAULT);
 
 		start_block_mode = round_down_to_simd_multiple_vla(start_block_mode);
 		vint lane_ids = vint::lane_id() + vint(start_block_mode);
-		for (unsigned int j = start_block_mode; j < end_block_mode; j += ASTCENC_SIMD_WIDTH)
+		for (size_t j = start_block_mode; j < end_block_mode; j += ASTCENC_SIMD_WIDTH)
 		{
 			vfloat err = vfloat(errors_of_best_combination + j);
 			vmask mask = err < vbest_ep_error;
@@ -1323,7 +1323,7 @@ unsigned int compute_ideal_endpoint_formats(
 		}
 	}
 
-	for (unsigned int i = 0; i < tune_candidate_limit; i++)
+	for (size_t i = 0; i < tune_candidate_limit; i++)
 	{
 		if (best_error_weights[i] < 0)
 		{
diff --git a/Source/astcenc_symbolic_physical.cpp b/Source/astcenc_symbolic_physical.cpp
index 45d9abb6..95d8a59d 100644
--- a/Source/astcenc_symbolic_physical.cpp
+++ b/Source/astcenc_symbolic_physical.cpp
@@ -111,12 +111,12 @@ void symbolic_to_physical(
 	{
 		// There is currently no attempt to coalesce larger void-extents
 		static const uint8_t cbytes[8] { 0xFC, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
-		for (unsigned int i = 0; i < 8; i++)
+		for (size_t i = 0; i < 8; i++)
 		{
 			pcb[i] = cbytes[i];
 		}
 
-		for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++)
+		for (size_t i = 0; i < BLOCK_MAX_COMPONENTS; i++)
 		{
 			pcb[2 * i + 8] = scb.constant_color[i] & 0xFF;
 			pcb[2 * i + 9] = (scb.constant_color[i] >> 8) & 0xFF;
@@ -130,12 +130,12 @@ void symbolic_to_physical(
 	{
 		// There is currently no attempt to coalesce larger void-extents
 		static const uint8_t cbytes[8]  { 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
-		for (unsigned int i = 0; i < 8; i++)
+		for (size_t i = 0; i < 8; i++)
 		{
 			pcb[i] = cbytes[i];
 		}
 
-		for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++)
+		for (size_t i = 0; i < BLOCK_MAX_COMPONENTS; i++)
 		{
 			pcb[2 * i + 8] = scb.constant_color[i] & 0xFF;
 			pcb[2 * i + 9] = (scb.constant_color[i] >> 8) & 0xFF;
@@ -144,7 +144,7 @@ void symbolic_to_physical(
 		return;
 	}
 
-	unsigned int partition_count = scb.partition_count;
+	size_t partition_count = scb.partition_count;
 
 	// Compress the weights.
 	// They are encoded as an ordinary integer-sequence, then bit-reversed
@@ -217,7 +217,7 @@ void symbolic_to_physical(
 			// Check endpoint types for each partition to determine the lowest class present
 			int low_class = 4;
 
-			for (unsigned int i = 0; i < partition_count; i++)
+			for (size_t i = 0; i < partition_count; i++)
 			{
 				int class_of_format = scb.color_formats[i] >> 2;
 				low_class = astc::min(class_of_format, low_class);
@@ -231,14 +231,14 @@ void symbolic_to_physical(
 			int encoded_type = low_class + 1;
 			int bitpos = 2;
 
-			for (unsigned int i = 0; i < partition_count; i++)
+			for (size_t i = 0; i < partition_count; i++)
 			{
 				int classbit_of_format = (scb.color_formats[i] >> 2) - low_class;
 				encoded_type |= classbit_of_format << bitpos;
 				bitpos++;
 			}
 
-			for (unsigned int i = 0; i < partition_count; i++)
+			for (size_t i = 0; i < partition_count; i++)
 			{
 				int lowbits_of_format = scb.color_formats[i] & 3;
 				encoded_type |= lowbits_of_format << bitpos;
@@ -270,7 +270,7 @@ void symbolic_to_physical(
 	int valuecount_to_encode = 0;
 
 	const uint8_t* pack_table = color_uquant_to_scrambled_pquant_tables[scb.quant_mode - QUANT_6];
-	for (unsigned int i = 0; i < scb.partition_count; i++)
+	for (size_t i = 0; i < scb.partition_count; i++)
 	{
 		int vals = 2 * (scb.color_formats[i] >> 2) + 2;
 		assert(vals <= 8);
@@ -369,7 +369,7 @@ void physical_to_symbolic(
 		return;
 	}
 
-	unsigned int packed_index = bsd.block_mode_packed_index[block_mode];
+	size_t packed_index = bsd.block_mode_packed_index[block_mode];
 	if (packed_index == BLOCK_BAD_BLOCK_MODE)
 	{
 		scb.block_type = SYM_BTYPE_ERROR;
diff --git a/Source/astcenc_vecmathlib.h b/Source/astcenc_vecmathlib.h
index e6ae97cc..7bb49c0c 100644
--- a/Source/astcenc_vecmathlib.h
+++ b/Source/astcenc_vecmathlib.h
@@ -218,9 +218,9 @@ template<typename T> T gatherf_byte_inds(const float* base, const uint8_t* indic
  *
  * @return The rounded value.
  */
-ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_8(unsigned int count)
+ASTCENC_SIMD_INLINE size_t round_down_to_simd_multiple_8(size_t count)
 {
-	return count & static_cast<unsigned int>(~(8 - 1));
+	return count & static_cast<size_t>(~(8 - 1));
 }
 
 /**
@@ -230,9 +230,9 @@ ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_8(unsigned int coun
  *
  * @return The rounded value.
  */
-ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_4(unsigned int count)
+ASTCENC_SIMD_INLINE size_t round_down_to_simd_multiple_4(size_t count)
 {
-	return count & static_cast<unsigned int>(~(4 - 1));
+	return count & static_cast<size_t>(~(4 - 1));
 }
 
 /**
@@ -244,9 +244,9 @@ ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_4(unsigned int coun
  *
  * @return The rounded value.
  */
-ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_vla(unsigned int count)
+ASTCENC_SIMD_INLINE size_t round_down_to_simd_multiple_vla(size_t count)
 {
-	return count & static_cast<unsigned int>(~(ASTCENC_SIMD_WIDTH - 1));
+	return count & static_cast<size_t>(~(ASTCENC_SIMD_WIDTH - 1));
 }
 
 /**
@@ -258,9 +258,9 @@ ASTCENC_SIMD_INLINE unsigned int round_down_to_simd_multiple_vla(unsigned int co
  *
  * @return The rounded value.
  */
-ASTCENC_SIMD_INLINE unsigned int round_up_to_simd_multiple_vla(unsigned int count)
+ASTCENC_SIMD_INLINE size_t round_up_to_simd_multiple_vla(size_t count)
 {
-	unsigned int multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH;
+	size_t multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH;
 	return multiples * ASTCENC_SIMD_WIDTH;
 }
 
diff --git a/Source/astcenc_vecmathlib_avx2_8.h b/Source/astcenc_vecmathlib_avx2_8.h
index 4c96c7c5..9e4807f0 100644
--- a/Source/astcenc_vecmathlib_avx2_8.h
+++ b/Source/astcenc_vecmathlib_avx2_8.h
@@ -292,9 +292,9 @@ ASTCENC_SIMD_INLINE vmask8 operator~(vmask8 a)
  *
  * bit0 = lane 0
  */
-ASTCENC_SIMD_INLINE unsigned int mask(vmask8 a)
+ASTCENC_SIMD_INLINE size_t mask(vmask8 a)
 {
-	return static_cast<unsigned int>(_mm256_movemask_ps(a.m));
+	return static_cast<size_t>(_mm256_movemask_ps(a.m));
 }
 
 /**
diff --git a/Source/astcenc_vecmathlib_neon_4.h b/Source/astcenc_vecmathlib_neon_4.h
index f31063d9..6cd76dc6 100644
--- a/Source/astcenc_vecmathlib_neon_4.h
+++ b/Source/astcenc_vecmathlib_neon_4.h
@@ -407,7 +407,7 @@ ASTCENC_SIMD_INLINE vmask4 operator~(vmask4 a)
  *
  * bit0 = lane 0
  */
-ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a)
+ASTCENC_SIMD_INLINE size_t mask(vmask4 a)
 {
 	static const int shifta[4] { 0, 1, 2, 3 };
 	static const int32x4_t shift = vld1q_s32(shifta);
diff --git a/Source/astcenc_vecmathlib_none_4.h b/Source/astcenc_vecmathlib_none_4.h
index f25c92be..977f5ac6 100644
--- a/Source/astcenc_vecmathlib_none_4.h
+++ b/Source/astcenc_vecmathlib_none_4.h
@@ -410,7 +410,7 @@ ASTCENC_SIMD_INLINE vmask4 operator~(vmask4 a)
  *
  * bit0 = lane 0
  */
-ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a)
+ASTCENC_SIMD_INLINE size_t mask(vmask4 a)
 {
 	return (a.m[0] & 0x1) |
 	       (a.m[1] & 0x2) |
diff --git a/Source/astcenc_vecmathlib_sse_4.h b/Source/astcenc_vecmathlib_sse_4.h
index f6991e46..db1e4d21 100644
--- a/Source/astcenc_vecmathlib_sse_4.h
+++ b/Source/astcenc_vecmathlib_sse_4.h
@@ -423,9 +423,9 @@ ASTCENC_SIMD_INLINE vmask4 operator~(vmask4 a)
  *
  * bit0 = lane 0
  */
-ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a)
+ASTCENC_SIMD_INLINE size_t mask(vmask4 a)
 {
-	return static_cast<unsigned int>(_mm_movemask_ps(a.m));
+	return static_cast<size_t>(_mm_movemask_ps(a.m));
 }
 
 /**
diff --git a/Source/astcenc_vecmathlib_sve_8.h b/Source/astcenc_vecmathlib_sve_8.h
index 1e98df02..e4e8a0b4 100644
--- a/Source/astcenc_vecmathlib_sve_8.h
+++ b/Source/astcenc_vecmathlib_sve_8.h
@@ -287,12 +287,12 @@ ASTCENC_SIMD_INLINE vmask8 operator~(vmask8 a)
  *
  * bit0 = lane 0
  */
-ASTCENC_SIMD_INLINE unsigned int mask(vmask8 a)
+ASTCENC_SIMD_INLINE size_t mask(vmask8 a)
 {
 	alignas(32) const int shifta[8] { 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 };
 	svint32_8_t template_vals = svld1_s32(svptrue_b32(), shifta);
 	svint32_8_t active_vals = svsel_s32(a.m, template_vals, svdup_s32(0));
-	return static_cast<unsigned int>(svaddv_s32(svptrue_b32(), active_vals));
+	return static_cast<size_t>(svaddv_s32(svptrue_b32(), active_vals));
 }
 
 /**
diff --git a/Source/astcenc_weight_align.cpp b/Source/astcenc_weight_align.cpp
index b2054164..660d5c05 100644
--- a/Source/astcenc_weight_align.cpp
+++ b/Source/astcenc_weight_align.cpp
@@ -45,7 +45,7 @@
 #include <cstring>
 #include <cfloat>
 
-static constexpr unsigned int ANGULAR_STEPS { 32 };
+static constexpr size_t ANGULAR_STEPS { 32 };
 
 static_assert((ANGULAR_STEPS % ASTCENC_SIMD_WIDTH) == 0,
               "ANGULAR_STEPS must be multiple of ASTCENC_SIMD_WIDTH");
@@ -55,7 +55,7 @@ static_assert(ANGULAR_STEPS >= 32,
 
 // Store a reduced sin/cos table for 64 possible weight values; this causes
 // slight quality loss compared to using sin() and cos() directly. Must be 2^N.
-static constexpr unsigned int SINCOS_STEPS { 64 };
+static constexpr size_t SINCOS_STEPS { 64 };
 
 static const uint8_t steps_for_quant_level[12] {
 	2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 32
@@ -71,11 +71,11 @@ ASTCENC_ALIGNAS static float cos_table[SINCOS_STEPS][ANGULAR_STEPS];
 /* See header for documentation. */
 void prepare_angular_tables()
 {
-	for (unsigned int i = 0; i < ANGULAR_STEPS; i++)
+	for (size_t i = 0; i < ANGULAR_STEPS; i++)
 	{
 		float angle_step = static_cast<float>(i + 1);
 
-		for (unsigned int j = 0; j < SINCOS_STEPS; j++)
+		for (size_t j = 0; j < SINCOS_STEPS; j++)
 		{
 			sin_table[j][i] = static_cast<float>(sinf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast<float>(j)));
 			cos_table[j][i] = static_cast<float>(cosf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast<float>(j)));
@@ -92,9 +92,9 @@ void prepare_angular_tables()
  * @param[out] offsets                   The output angular offsets array.
  */
 static void compute_angular_offsets(
-	unsigned int weight_count,
+	size_t weight_count,
 	const float* dec_weight_ideal_value,
-	unsigned int max_angular_steps,
+	size_t max_angular_steps,
 	float* offsets
 ) {
 	promise(weight_count > 0);
@@ -103,7 +103,7 @@ static void compute_angular_offsets(
 	ASTCENC_ALIGNAS int isamplev[BLOCK_MAX_WEIGHTS];
 
 	// Precompute isample; arrays are always allocated 64 elements long
-	for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
+	for (size_t i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
 		// Ideal weight can be outside [0, 1] range, so clamp to fit table
 		vfloat ideal_weight = clampzo(loada(dec_weight_ideal_value + i));
@@ -117,12 +117,12 @@ static void compute_angular_offsets(
 	// Arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max
 	vfloat mult(1.0f / (2.0f * astc::PI));
 
-	for (unsigned int i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH)
+	for (size_t i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH)
 	{
 		vfloat anglesum_x = vfloat::zero();
 		vfloat anglesum_y = vfloat::zero();
 
-		for (unsigned int j = 0; j < weight_count; j++)
+		for (size_t j = 0; j < weight_count; j++)
 		{
 			int isample = isamplev[j];
 			anglesum_x += loada(cos_table[isample] + i);
@@ -154,10 +154,10 @@ static void compute_angular_offsets(
  * @param[out] cut_high_weight_error     Per angular step, the high weight cut error.
  */
 static void compute_lowest_and_highest_weight(
-	unsigned int weight_count,
+	size_t weight_count,
 	const float* dec_weight_ideal_value,
-	unsigned int max_angular_steps,
-	unsigned int max_quant_steps,
+	size_t max_angular_steps,
+	size_t max_quant_steps,
 	const float* offsets,
 	float* lowest_weight,
 	int* weight_span,
@@ -177,7 +177,7 @@ static void compute_lowest_and_highest_weight(
 	vfloat max_weight(-FLT_MAX);
 
 	vint lane_id = vint::lane_id();
-	for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
+	for (size_t i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
 	{
 		vmask active = lane_id < vint(weight_count);
 		lane_id += vint(ASTCENC_SIMD_WIDTH);
@@ -191,7 +191,7 @@ static void compute_lowest_and_highest_weight(
 	max_weight = hmax(max_weight);
 
 	// Arrays are ANGULAR_STEPS long, so always safe to run full vectors
-	for (unsigned int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH)
+	for (size_t sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH)
 	{
 		vfloat errval = vfloat::zero();
 		vfloat cut_low_weight_err = vfloat::zero();
@@ -203,7 +203,7 @@ static void compute_lowest_and_highest_weight(
 		vfloat minidx = round(min_weight * rcp_stepsize - offset);
 		vfloat maxidx = round(max_weight * rcp_stepsize - offset);
 
-		for (unsigned int j = 0; j < weight_count; j++)
+		for (size_t j = 0; j < weight_count; j++)
 		{
 			vfloat sval = load1(dec_weight_ideal_value + j) * rcp_stepsize - offset;
 			vfloat svalrte = round(sval);
@@ -250,14 +250,14 @@ static void compute_lowest_and_highest_weight(
  * @param[out] high_value                Per angular step, the highest weight value.
  */
 static void compute_angular_endpoints_for_quant_levels(
-	unsigned int weight_count,
+	size_t weight_count,
 	const float* dec_weight_ideal_value,
-	unsigned int max_quant_level,
+	size_t max_quant_level,
 	float low_value[TUNE_MAX_ANGULAR_QUANT + 1],
 	float high_value[TUNE_MAX_ANGULAR_QUANT + 1]
 ) {
-	unsigned int max_quant_steps = steps_for_quant_level[max_quant_level];
-	unsigned int max_angular_steps = steps_for_quant_level[max_quant_level];
+	size_t max_quant_steps = steps_for_quant_level[max_quant_level];
+	size_t max_angular_steps = steps_for_quant_level[max_quant_level];
 
 	ASTCENC_ALIGNAS float angular_offsets[ANGULAR_STEPS];
 
@@ -282,7 +282,7 @@ static void compute_angular_endpoints_for_quant_levels(
 
 	// Initialize the array to some safe defaults
 	promise(max_quant_steps > 0);
-	for (unsigned int i = 0; i < (max_quant_steps + 4); i++)
+	for (size_t i = 0; i < (max_quant_steps + 4); i++)
 	{
 		// Lane<0> = Best error
 		// Lane<1> = Best scale; -1 indicates no solution found
@@ -291,7 +291,7 @@ static void compute_angular_endpoints_for_quant_levels(
 	}
 
 	promise(max_angular_steps > 0);
-	for (unsigned int i = 0; i < max_angular_steps; i++)
+	for (size_t i = 0; i < max_angular_steps; i++)
 	{
 		float i_flt = static_cast<float>(i);
 
@@ -325,9 +325,9 @@ static void compute_angular_endpoints_for_quant_levels(
 		best_results[idx_span - 2] = select(best_result, new_result, mask);
 	}
 
-	for (unsigned int i = 0; i <= max_quant_level; i++)
+	for (size_t i = 0; i <= max_quant_level; i++)
 	{
-		unsigned int q = steps_for_quant_level[i];
+		size_t q = steps_for_quant_level[i];
 		int bsi = static_cast<int>(best_results[q].lane<1>());
 
 		// Did we find anything?
@@ -355,7 +355,7 @@ void compute_angular_endpoints_1plane(
 	bool only_always,
 	const block_size_descriptor& bsd,
 	const float* dec_weight_ideal_value,
-	unsigned int max_weight_quant,
+	size_t max_weight_quant,
 	compression_working_buffers& tmpbuf
 ) {
 	float (&low_value)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1;
@@ -364,10 +364,10 @@ void compute_angular_endpoints_1plane(
 	float (&low_values)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_low_values1;
 	float (&high_values)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_high_values1;
 
-	unsigned int max_decimation_modes = only_always ? bsd.decimation_mode_count_always
+	size_t max_decimation_modes = only_always ? bsd.decimation_mode_count_always
 	                                                : bsd.decimation_mode_count_selected;
 	promise(max_decimation_modes > 0);
-	for (unsigned int i = 0; i < max_decimation_modes; i++)
+	for (size_t i = 0; i < max_decimation_modes; i++)
 	{
 		const decimation_mode& dm = bsd.decimation_modes[i];
 		if (!dm.is_ref_1plane(static_cast<quant_method>(max_weight_quant)))
@@ -375,9 +375,9 @@ void compute_angular_endpoints_1plane(
 			continue;
 		}
 
-		unsigned int weight_count = bsd.get_decimation_info(i).weight_count;
+		size_t weight_count = bsd.get_decimation_info(i).weight_count;
 
-		unsigned int max_precision = dm.maxprec_1plane;
+		size_t max_precision = dm.maxprec_1plane;
 		if (max_precision > TUNE_MAX_ANGULAR_QUANT)
 		{
 			max_precision = TUNE_MAX_ANGULAR_QUANT;
@@ -394,16 +394,16 @@ void compute_angular_endpoints_1plane(
 		    max_precision, low_values[i], high_values[i]);
 	}
 
-	unsigned int max_block_modes = only_always ? bsd.block_mode_count_1plane_always
+	size_t max_block_modes = only_always ? bsd.block_mode_count_1plane_always
 	                                           : bsd.block_mode_count_1plane_selected;
 	promise(max_block_modes > 0);
-	for (unsigned int i = 0; i < max_block_modes; i++)
+	for (size_t i = 0; i < max_block_modes; i++)
 	{
 		const block_mode& bm = bsd.block_modes[i];
 		assert(!bm.is_dual_plane);
 
-		unsigned int quant_mode = bm.quant_mode;
-		unsigned int decim_mode = bm.decimation_mode;
+		size_t quant_mode = bm.quant_mode;
+		size_t decim_mode = bm.decimation_mode;
 
 		if (quant_mode <= TUNE_MAX_ANGULAR_QUANT)
 		{
@@ -422,7 +422,7 @@ void compute_angular_endpoints_1plane(
 void compute_angular_endpoints_2planes(
 	const block_size_descriptor& bsd,
 	const float* dec_weight_ideal_value,
-	unsigned int max_weight_quant,
+	size_t max_weight_quant,
 	compression_working_buffers& tmpbuf
 ) {
 	float (&low_value1)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1;
@@ -436,7 +436,7 @@ void compute_angular_endpoints_2planes(
 	float (&high_values2)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_high_values2;
 
 	promise(bsd.decimation_mode_count_selected > 0);
-	for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++)
+	for (size_t i = 0; i < bsd.decimation_mode_count_selected; i++)
 	{
 		const decimation_mode& dm = bsd.decimation_modes[i];
 		if (!dm.is_ref_2plane(static_cast<quant_method>(max_weight_quant)))
@@ -444,9 +444,9 @@ void compute_angular_endpoints_2planes(
 			continue;
 		}
 
-		unsigned int weight_count = bsd.get_decimation_info(i).weight_count;
+		size_t weight_count = bsd.get_decimation_info(i).weight_count;
 
-		unsigned int max_precision = dm.maxprec_2planes;
+		size_t max_precision = dm.maxprec_2planes;
 		if (max_precision > TUNE_MAX_ANGULAR_QUANT)
 		{
 			max_precision = TUNE_MAX_ANGULAR_QUANT;
@@ -468,13 +468,13 @@ void compute_angular_endpoints_2planes(
 		    max_precision, low_values2[i], high_values2[i]);
 	}
 
-	unsigned int start = bsd.block_mode_count_1plane_selected;
-	unsigned int end = bsd.block_mode_count_1plane_2plane_selected;
-	for (unsigned int i = start; i < end; i++)
+	size_t start = bsd.block_mode_count_1plane_selected;
+	size_t end = bsd.block_mode_count_1plane_2plane_selected;
+	for (size_t i = start; i < end; i++)
 	{
 		const block_mode& bm = bsd.block_modes[i];
-		unsigned int quant_mode = bm.quant_mode;
-		unsigned int decim_mode = bm.decimation_mode;
+		size_t quant_mode = bm.quant_mode;
+		size_t decim_mode = bm.decimation_mode;
 
 		if (quant_mode <= TUNE_MAX_ANGULAR_QUANT)
 		{
diff --git a/Source/astcenccli_error_metrics.cpp b/Source/astcenccli_error_metrics.cpp
index 4e01a9ee..82eecc20 100644
--- a/Source/astcenccli_error_metrics.cpp
+++ b/Source/astcenccli_error_metrics.cpp
@@ -126,30 +126,30 @@ void compute_error_metrics(
 	double mean_angular_errorsum = 0.0;
 	double worst_angular_errorsum = 0.0;
 
-	unsigned int dim_x = astc::min(img1->dim_x, img2->dim_x);
-	unsigned int dim_y = astc::min(img1->dim_y, img2->dim_y);
-	unsigned int dim_z = astc::min(img1->dim_z, img2->dim_z);
+	size_t dim_x = astc::min(img1->dim_x, img2->dim_x);
+	size_t dim_y = astc::min(img1->dim_y, img2->dim_y);
+	size_t dim_z = astc::min(img1->dim_z, img2->dim_z);
 
 	if (img1->dim_x != img2->dim_x ||
 	    img1->dim_y != img2->dim_y ||
 	    img1->dim_z != img2->dim_z)
 	{
 		printf("WARNING: Only intersection of images will be compared:\n"
-		       "  Image 1: %dx%dx%d\n"
-		       "  Image 2: %dx%dx%d\n",
+		       "  Image 1: %zux%zux%zu\n"
+		       "  Image 2: %zux%zux%zu\n",
 		       img1->dim_x, img1->dim_y, img1->dim_z,
 		       img2->dim_x, img2->dim_y, img2->dim_z);
 	}
 
 	double rgb_peak = 0.0;
-	unsigned int xsize1 = img1->dim_x;
-	unsigned int xsize2 = img2->dim_x;
+	size_t xsize1 = img1->dim_x;
+	size_t xsize2 = img2->dim_x;
 
-	for (unsigned int z = 0; z < dim_z; z++)
+	for (size_t z = 0; z < dim_z; z++)
 	{
-		for (unsigned int y = 0; y < dim_y; y++)
+		for (size_t y = 0; y < dim_y; y++)
 		{
-			for (unsigned int x = 0; x < dim_x; x++)
+			for (size_t x = 0; x < dim_x; x++)
 			{
 				vfloat4 color1;
 				vfloat4 color2;
diff --git a/Source/astcenccli_image.cpp b/Source/astcenccli_image.cpp
index 237da60c..cbb78d80 100644
--- a/Source/astcenccli_image.cpp
+++ b/Source/astcenccli_image.cpp
@@ -26,10 +26,10 @@
 
 /* See header for documentation. */
 astcenc_image *alloc_image(
-	unsigned int bitness,
-	unsigned int dim_x,
-	unsigned int dim_y,
-	unsigned int dim_z
+	size_t bitness,
+	size_t dim_x,
+	size_t dim_y,
+	size_t dim_z
 ) {
 	astcenc_image *img = new astcenc_image;
 	img->dim_x = dim_x;
@@ -42,7 +42,7 @@ astcenc_image *alloc_image(
 	if (bitness == 8)
 	{
 		img->data_type = ASTCENC_TYPE_U8;
-		for (unsigned int z = 0; z < dim_z; z++)
+		for (size_t z = 0; z < dim_z; z++)
 		{
 			data[z] = new uint8_t[dim_x * dim_y * 4];
 		}
@@ -50,7 +50,7 @@ astcenc_image *alloc_image(
 	else if (bitness == 16)
 	{
 		img->data_type = ASTCENC_TYPE_F16;
-		for (unsigned int z = 0; z < dim_z; z++)
+		for (size_t z = 0; z < dim_z; z++)
 		{
 			data[z] = new uint16_t[dim_x * dim_y * 4];
 		}
@@ -59,7 +59,7 @@ astcenc_image *alloc_image(
 	{
 		assert(bitness == 32);
 		img->data_type = ASTCENC_TYPE_F32;
-		for (unsigned int z = 0; z < dim_z; z++)
+		for (size_t z = 0; z < dim_z; z++)
 		{
 			data[z] = new float[dim_x * dim_y * 4];
 		}
@@ -76,7 +76,7 @@ void free_image(astcenc_image * img)
 		return;
 	}
 
-	for (unsigned int z = 0; z < img->dim_z; z++)
+	for (size_t z = 0; z < img->dim_z; z++)
 	{
 		delete[] reinterpret_cast<char*>(img->data[z]);
 	}
@@ -88,9 +88,9 @@ void free_image(astcenc_image * img)
 /* See header for documentation. */
 int determine_image_components(const astcenc_image * img)
 {
-	unsigned int dim_x = img->dim_x;
-	unsigned int dim_y = img->dim_y;
-	unsigned int dim_z = img->dim_z;
+	size_t dim_x = img->dim_x;
+	size_t dim_y = img->dim_y;
+	size_t dim_z = img->dim_z;
 
 	// Scan through the image data to determine how many color components the image has
 	bool is_luma = true;
@@ -98,13 +98,13 @@ int determine_image_components(const astcenc_image * img)
 
 	if (img->data_type == ASTCENC_TYPE_U8)
 	{
-		for (unsigned int z = 0; z < dim_z; z++)
+		for (size_t z = 0; z < dim_z; z++)
 		{
 			uint8_t* data8 = static_cast<uint8_t*>(img->data[z]);
 
-			for (unsigned int y = 0; y < dim_y; y++)
+			for (size_t y = 0; y < dim_y; y++)
 			{
-				for (unsigned int x = 0; x < dim_x; x++)
+				for (size_t x = 0; x < dim_x; x++)
 				{
 					int r = data8[(4 * dim_x * y) + (4 * x    )];
 					int g = data8[(4 * dim_x * y) + (4 * x + 1)];
@@ -119,13 +119,13 @@ int determine_image_components(const astcenc_image * img)
 	}
 	else if (img->data_type == ASTCENC_TYPE_F16)
 	{
-		for (unsigned int z = 0; z < dim_z; z++)
+		for (size_t z = 0; z < dim_z; z++)
 		{
 			uint16_t* data16 = static_cast<uint16_t*>(img->data[z]);
 
-			for (unsigned int y = 0; y < dim_y; y++)
+			for (size_t y = 0; y < dim_y; y++)
 			{
-				for (unsigned int x = 0; x < dim_x; x++)
+				for (size_t x = 0; x < dim_x; x++)
 				{
 					int r = data16[(4 * dim_x * y) + (4 * x    )];
 					int g = data16[(4 * dim_x * y) + (4 * x + 1)];
@@ -143,13 +143,13 @@ int determine_image_components(const astcenc_image * img)
 	{
 		assert(img->data_type == ASTCENC_TYPE_F32);
 
-		for (unsigned int z = 0; z < dim_z; z++)
+		for (size_t z = 0; z < dim_z; z++)
 		{
 			float* data32 = static_cast<float*>(img->data[z]);
 
-			for (unsigned int y = 0; y < dim_y; y++)
+			for (size_t y = 0; y < dim_y; y++)
 			{
-				for (unsigned int x = 0; x < dim_x; x++)
+				for (size_t x = 0; x < dim_x; x++)
 				{
 					float r = data32[(4 * dim_x * y) + (4 * x    )];
 					float g = data32[(4 * dim_x * y) + (4 * x + 1)];
@@ -170,19 +170,19 @@ int determine_image_components(const astcenc_image * img)
 /* See header for documentation. */
 astcenc_image* astc_img_from_floatx4_array(
 	const float* data,
-	unsigned int dim_x,
-	unsigned int dim_y,
+	size_t dim_x,
+	size_t dim_y,
 	bool y_flip
 ) {
 	astcenc_image* img = alloc_image(16, dim_x, dim_y, 1);
 
-	for (unsigned int y = 0; y < dim_y; y++)
+	for (size_t y = 0; y < dim_y; y++)
 	{
 		uint16_t* data16 = static_cast<uint16_t*>(img->data[0]);
-		unsigned int y_src = y_flip ? (dim_y - y - 1) : y;
+		size_t y_src = y_flip ? (dim_y - y - 1) : y;
 		const float* src = data + 4 * dim_x * y_src;
 
-		for (unsigned int x = 0; x < dim_x; x++)
+		for (size_t x = 0; x < dim_x; x++)
 		{
 			vint4 colorf16 = float_to_float16(vfloat4(
 				src[4 * x    ],
@@ -204,19 +204,19 @@ astcenc_image* astc_img_from_floatx4_array(
 /* See header for documentation. */
 astcenc_image* astc_img_from_unorm8x4_array(
 	const uint8_t* data,
-	unsigned int dim_x,
-	unsigned int dim_y,
+	size_t dim_x,
+	size_t dim_y,
 	bool y_flip
 ) {
 	astcenc_image* img = alloc_image(8, dim_x, dim_y, 1);
 
-	for (unsigned int y = 0; y < dim_y; y++)
+	for (size_t y = 0; y < dim_y; y++)
 	{
 		uint8_t* data8 = static_cast<uint8_t*>(img->data[0]);
-		unsigned int y_src = y_flip ? (dim_y - y - 1) : y;
+		size_t y_src = y_flip ? (dim_y - y - 1) : y;
 		const uint8_t* src = data + 4 * dim_x * y_src;
 
-		for (unsigned int x = 0; x < dim_x; x++)
+		for (size_t x = 0; x < dim_x; x++)
 		{
 			data8[(4 * dim_x * y) + (4 * x    )] = src[4 * x    ];
 			data8[(4 * dim_x * y) + (4 * x + 1)] = src[4 * x + 1];
@@ -234,10 +234,10 @@ astcenc_image* astc_img_from_unorm8x4_array(
 float* floatx4_array_from_astc_img(
 	const astcenc_image* img,
 	bool y_flip,
-	unsigned int z_index
+	size_t z_index
 ) {
-	unsigned int dim_x = img->dim_x;
-	unsigned int dim_y = img->dim_y;
+	size_t dim_x = img->dim_x;
+	size_t dim_y = img->dim_y;
 	float *buf = new float[4 * dim_x * dim_y];
 
 	assert(z_index < img->dim_z);
@@ -245,12 +245,12 @@ float* floatx4_array_from_astc_img(
 	if (img->data_type == ASTCENC_TYPE_U8)
 	{
 		uint8_t* data8 = static_cast<uint8_t*>(img->data[z_index]);
-		for (unsigned int y = 0; y < dim_y; y++)
+		for (size_t y = 0; y < dim_y; y++)
 		{
-			unsigned int ymod = y_flip ? dim_y - y - 1 : y;
+			size_t ymod = y_flip ? dim_y - y - 1 : y;
 			float* dst = buf + y * dim_x * 4;
 
-			for (unsigned int x = 0; x < dim_x; x++)
+			for (size_t x = 0; x < dim_x; x++)
 			{
 				dst[4 * x    ] = data8[(4 * dim_x * ymod) + (4 * x    )] * (1.0f / 255.0f);
 				dst[4 * x + 1] = data8[(4 * dim_x * ymod) + (4 * x + 1)] * (1.0f / 255.0f);
@@ -262,12 +262,12 @@ float* floatx4_array_from_astc_img(
 	else if (img->data_type == ASTCENC_TYPE_F16)
 	{
 		uint16_t* data16 = static_cast<uint16_t*>(img->data[z_index]);
-		for (unsigned int y = 0; y < dim_y; y++)
+		for (size_t y = 0; y < dim_y; y++)
 		{
-			unsigned int ymod = y_flip ? dim_y - y - 1 : y;
+			size_t ymod = y_flip ? dim_y - y - 1 : y;
 			float *dst = buf + y * dim_x * 4;
 
-			for (unsigned int x = 0; x < dim_x; x++)
+			for (size_t x = 0; x < dim_x; x++)
 			{
 				vint4 colori(
 					data16[(4 * dim_x * ymod) + (4 * x    )],
@@ -285,12 +285,12 @@ float* floatx4_array_from_astc_img(
 	{
 		assert(img->data_type == ASTCENC_TYPE_F32);
 		float* data32 = static_cast<float*>(img->data[z_index]);
-		for (unsigned int y = 0; y < dim_y; y++)
+		for (size_t y = 0; y < dim_y; y++)
 		{
-			unsigned int ymod = y_flip ? dim_y - y - 1 : y;
+			size_t ymod = y_flip ? dim_y - y - 1 : y;
 			float *dst = buf + y * dim_x * 4;
 
-			for (unsigned int x = 0; x < dim_x; x++)
+			for (size_t x = 0; x < dim_x; x++)
 			{
 				dst[4 * x    ] = data32[(4 * dim_x * ymod) + (4 * x    )];
 				dst[4 * x + 1] = data32[(4 * dim_x * ymod) + (4 * x + 1)];
@@ -308,19 +308,19 @@ uint8_t* unorm8x4_array_from_astc_img(
 	const astcenc_image* img,
 	bool y_flip
 ) {
-	unsigned int dim_x = img->dim_x;
-	unsigned int dim_y = img->dim_y;
+	size_t dim_x = img->dim_x;
+	size_t dim_y = img->dim_y;
 	uint8_t* buf = new uint8_t[4 * dim_x * dim_y];
 
 	if (img->data_type == ASTCENC_TYPE_U8)
 	{
 		uint8_t* data8 = static_cast<uint8_t*>(img->data[0]);
-		for (unsigned int y = 0; y < dim_y; y++)
+		for (size_t y = 0; y < dim_y; y++)
 		{
-			unsigned int ymod = y_flip ? dim_y - y - 1 : y;
+			size_t ymod = y_flip ? dim_y - y - 1 : y;
 			uint8_t* dst = buf + y * dim_x * 4;
 
-			for (unsigned int x = 0; x < dim_x; x++)
+			for (size_t x = 0; x < dim_x; x++)
 			{
 				dst[4 * x    ] = data8[(4 * dim_x * ymod) + (4 * x    )];
 				dst[4 * x + 1] = data8[(4 * dim_x * ymod) + (4 * x + 1)];
@@ -332,12 +332,12 @@ uint8_t* unorm8x4_array_from_astc_img(
 	else if (img->data_type == ASTCENC_TYPE_F16)
 	{
 		uint16_t* data16 = static_cast<uint16_t*>(img->data[0]);
-		for (unsigned int y = 0; y < dim_y; y++)
+		for (size_t y = 0; y < dim_y; y++)
 		{
-			unsigned int ymod = y_flip ? dim_y - y - 1 : y;
+			size_t ymod = y_flip ? dim_y - y - 1 : y;
 			uint8_t* dst = buf + y * dim_x * 4;
 
-			for (unsigned int x = 0; x < dim_x; x++)
+			for (size_t x = 0; x < dim_x; x++)
 			{
 				vint4 colori(
 					data16[(4 * dim_x * ymod) + (4 * x    )],
@@ -358,12 +358,12 @@ uint8_t* unorm8x4_array_from_astc_img(
 	{
 		assert(img->data_type == ASTCENC_TYPE_F32);
 		float* data32 = static_cast<float*>(img->data[0]);
-		for (unsigned int y = 0; y < dim_y; y++)
+		for (size_t y = 0; y < dim_y; y++)
 		{
-			unsigned int ymod = y_flip ? dim_y - y - 1 : y;
+			size_t ymod = y_flip ? dim_y - y - 1 : y;
 			uint8_t* dst = buf + y * dim_x * 4;
 
-			for (unsigned int x = 0; x < dim_x; x++)
+			for (size_t x = 0; x < dim_x; x++)
 			{
 				dst[4 * x    ] = static_cast<uint8_t>(astc::flt2int_rtn(astc::clamp1f(data32[(4 * dim_x * ymod) + (4 * x    )]) * 255.0f));
 				dst[4 * x + 1] = static_cast<uint8_t>(astc::flt2int_rtn(astc::clamp1f(data32[(4 * dim_x * ymod) + (4 * x + 1)]) * 255.0f));
diff --git a/Source/astcenccli_image_external.cpp b/Source/astcenccli_image_external.cpp
index e41b9abf..3d548071 100644
--- a/Source/astcenccli_image_external.cpp
+++ b/Source/astcenccli_image_external.cpp
@@ -84,7 +84,7 @@ astcenc_image* load_png_with_wuffs(
 	const char* filename,
 	bool y_flip,
 	bool& is_hdr,
-	unsigned int& component_count
+	size_t& component_count
 ) {
 	is_hdr = false;
 	component_count = 4;
diff --git a/Source/astcenccli_image_load_store.cpp b/Source/astcenccli_image_load_store.cpp
index 41ac4de9..af76450b 100644
--- a/Source/astcenccli_image_load_store.cpp
+++ b/Source/astcenccli_image_load_store.cpp
@@ -46,7 +46,7 @@
 static std::string get_output_filename(
 	const astcenc_image* img,
 	const char* filename,
-	unsigned int index
+	size_t index
 ) {
 	if (img->dim_z <= 1)
 	{
@@ -83,7 +83,7 @@ static astcenc_image* load_image_with_tinyexr(
 	const char* filename,
 	bool y_flip,
 	bool& is_hdr,
-	unsigned int& component_count
+	size_t& component_count
 ) {
 	int dim_x, dim_y;
 	float* image;
@@ -119,7 +119,7 @@ static astcenc_image* load_image_with_stb(
 	const char* filename,
 	bool y_flip,
 	bool& is_hdr,
-	unsigned int& component_count
+	size_t& component_count
 ) {
 	int dim_x, dim_y;
 
@@ -168,7 +168,7 @@ static bool store_exr_image_with_tinyexr(
 ) {
 	int res { 0 };
 
-	for (unsigned int i = 0; i < img->dim_z; i++)
+	for (size_t i = 0; i < img->dim_z; i++)
 	{
 		std::string fnmod = get_output_filename(img, filename, i);
 		float* buf = floatx4_array_from_astc_img(img, y_flip, i);
@@ -202,7 +202,7 @@ static bool store_png_image_with_stb(
 
 	assert(img->data_type == ASTCENC_TYPE_U8);
 
-	for (unsigned int i = 0; i < img->dim_z; i++)
+	for (size_t i = 0; i < img->dim_z; i++)
 	{
 		std::string fnmod = get_output_filename(img, filename, i);
 		uint8_t* buf = reinterpret_cast<uint8_t*>(img->data[i]);
@@ -236,7 +236,7 @@ static bool store_tga_image_with_stb(
 
 	assert(img->data_type == ASTCENC_TYPE_U8);
 
-	for (unsigned int i = 0; i < img->dim_z; i++)
+	for (size_t i = 0; i < img->dim_z; i++)
 	{
 		std::string fnmod = get_output_filename(img, filename, i);
 		uint8_t* buf = reinterpret_cast<uint8_t*>(img->data[i]);
@@ -270,7 +270,7 @@ static bool store_bmp_image_with_stb(
 
 	assert(img->data_type == ASTCENC_TYPE_U8);
 
-	for (unsigned int i = 0; i < img->dim_z; i++)
+	for (size_t i = 0; i < img->dim_z; i++)
 	{
 		std::string fnmod = get_output_filename(img, filename, i);
 		uint8_t* buf = reinterpret_cast<uint8_t*>(img->data[i]);
@@ -302,7 +302,7 @@ static bool store_hdr_image_with_stb(
 ) {
 	int res { 0 };
 
-	for (unsigned int i = 0; i < img->dim_z; i++)
+	for (size_t i = 0; i < img->dim_z; i++)
 	{
 		std::string fnmod = get_output_filename(img, filename, i);
 		float* buf = floatx4_array_from_astc_img(img, y_flip, i);
@@ -787,11 +787,11 @@ static uint32_t u32_byterev(uint32_t v)
 
 struct format_entry
 {
-	unsigned int x;
-	unsigned int y;
-	unsigned int z;
+	size_t x;
+	size_t y;
+	size_t z;
 	bool is_srgb;
-	unsigned int format;
+	size_t format;
 };
 
 static const std::array<format_entry, 48> ASTC_FORMATS =
@@ -851,7 +851,7 @@ static const std::array<format_entry, 48> ASTC_FORMATS =
 }};
 
 static const format_entry* get_format(
-	unsigned int format
+	size_t format
 ) {
 	for (auto& it : ASTC_FORMATS)
 	{
@@ -863,10 +863,10 @@ static const format_entry* get_format(
 	return nullptr;
 }
 
-static unsigned int get_format(
-	unsigned int x,
-	unsigned int y,
-	unsigned int z,
+static size_t get_format(
+	size_t x,
+	size_t y,
+	size_t z,
 	bool is_srgb
 ) {
 	for (auto& it : ASTC_FORMATS)
@@ -935,7 +935,7 @@ static astcenc_image* load_ktx_uncompressed_image(
 	const char* filename,
 	bool y_flip,
 	bool& is_hdr,
-	unsigned int& component_count
+	size_t& component_count
 ) {
 	FILE *f = fopen(filename, "rb");
 	if (!f)
@@ -1173,9 +1173,9 @@ static astcenc_image* load_ktx_uncompressed_image(
 	}
 
 
-	unsigned int dim_x = hdr.pixel_width;
-	unsigned int dim_y = astc::max(hdr.pixel_height, 1u);
-	unsigned int dim_z = astc::max(hdr.pixel_depth, 1u);
+	size_t dim_x = hdr.pixel_width;
+	size_t dim_y = astc::max(hdr.pixel_height, 1u);
+	size_t dim_z = astc::max(hdr.pixel_depth, 1u);
 
 	// ignore the key/value data
 	fseek(f, hdr.bytes_of_key_value_data, SEEK_CUR);
@@ -1232,12 +1232,12 @@ static astcenc_image* load_ktx_uncompressed_image(
 	// Transfer data from the surface to our own image data structure
 	astcenc_image *astc_img = alloc_image(bitness, dim_x, dim_y, dim_z);
 
-	for (unsigned int z = 0; z < dim_z; z++)
+	for (size_t z = 0; z < dim_z; z++)
 	{
-		for (unsigned int y = 0; y < dim_y; y++)
+		for (size_t y = 0; y < dim_y; y++)
 		{
-			unsigned int ymod = y_flip ? dim_y - y - 1 : y;
-			unsigned int ydst = ymod;
+			size_t ymod = y_flip ? dim_y - y - 1 : y;
+			size_t ydst = ymod;
 			void *dst;
 
 			if (astc_img->data_type == ASTCENC_TYPE_U8)
@@ -1335,7 +1335,7 @@ bool load_ktx_compressed_image(
 	}
 
 	// Read the length of the data and endianess convert
-	unsigned int data_len;
+	size_t data_len;
 	actual = fread(&data_len, 1, sizeof(data_len), f);
 	if (actual != sizeof(data_len))
 	{
@@ -1391,7 +1391,7 @@ bool store_ktx_compressed_image(
 	const char* filename,
 	bool is_srgb
 ) {
-	unsigned int fmt = get_format(img.block_x, img.block_y, img.block_z, is_srgb);
+	size_t fmt = get_format(img.block_x, img.block_y, img.block_z, is_srgb);
 
 	ktx_header hdr;
 	memcpy(hdr.magic, ktx_magic, 12);
@@ -1445,9 +1445,9 @@ static bool store_ktx_uncompressed_image(
 	const char* filename,
 	int y_flip
 ) {
-	unsigned int dim_x = img->dim_x;
-	unsigned int dim_y = img->dim_y;
-	unsigned int dim_z = img->dim_z;
+	size_t dim_x = img->dim_x;
+	size_t dim_y = img->dim_y;
+	size_t dim_z = img->dim_z;
 
 	int bitness = img->data_type == ASTCENC_TYPE_U8 ? 8 : 16;
 	int image_components = determine_image_components(img);
@@ -1497,43 +1497,43 @@ static bool store_ktx_uncompressed_image(
 		row_pointers8[0] = new uint8_t *[dim_y * dim_z];
 		row_pointers8[0][0] = new uint8_t[dim_x * dim_y * dim_z * image_components + 3];
 
-		for (unsigned int z = 1; z < dim_z; z++)
+		for (size_t z = 1; z < dim_z; z++)
 		{
 			row_pointers8[z] = row_pointers8[0] + dim_y * z;
 			row_pointers8[z][0] = row_pointers8[0][0] + dim_y * dim_x * image_components * z;
 		}
 
-		for (unsigned int z = 0; z < dim_z; z++)
+		for (size_t z = 0; z < dim_z; z++)
 		{
-			for (unsigned int y = 1; y < dim_y; y++)
+			for (size_t y = 1; y < dim_y; y++)
 			{
 				row_pointers8[z][y] = row_pointers8[z][0] + dim_x * image_components * y;
 			}
 		}
 
-		for (unsigned int z = 0; z < dim_z; z++)
+		for (size_t z = 0; z < dim_z; z++)
 		{
 			uint8_t* data8 = static_cast<uint8_t*>(img->data[z]);
-			for (unsigned int y = 0; y < dim_y; y++)
+			for (size_t y = 0; y < dim_y; y++)
 			{
 				int ym = y_flip ? dim_y - y - 1 : y;
 				switch (image_components)
 				{
 				case 1:		// single-component, treated as Luminance
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers8[z][y][x] = data8[(4 * dim_x * ym) + (4 * x    )];
 					}
 					break;
 				case 2:		// two-component, treated as Luminance-Alpha
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers8[z][y][2 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
 						row_pointers8[z][y][2 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 3)];
 					}
 					break;
 				case 3:		// three-component, treated a
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers8[z][y][3 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
 						row_pointers8[z][y][3 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
@@ -1541,7 +1541,7 @@ static bool store_ktx_uncompressed_image(
 					}
 					break;
 				case 4:		// four-component, treated as RGBA
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers8[z][y][4 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
 						row_pointers8[z][y][4 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
@@ -1559,43 +1559,43 @@ static bool store_ktx_uncompressed_image(
 		row_pointers16[0] = new uint16_t *[dim_y * dim_z];
 		row_pointers16[0][0] = new uint16_t[dim_x * dim_y * dim_z * image_components + 1];
 
-		for (unsigned int z = 1; z < dim_z; z++)
+		for (size_t z = 1; z < dim_z; z++)
 		{
 			row_pointers16[z] = row_pointers16[0] + dim_y * z;
 			row_pointers16[z][0] = row_pointers16[0][0] + dim_y * dim_x * image_components * z;
 		}
 
-		for (unsigned int z = 0; z < dim_z; z++)
+		for (size_t z = 0; z < dim_z; z++)
 		{
-			for (unsigned int y = 1; y < dim_y; y++)
+			for (size_t y = 1; y < dim_y; y++)
 			{
 				row_pointers16[z][y] = row_pointers16[z][0] + dim_x * image_components * y;
 			}
 		}
 
-		for (unsigned int z = 0; z < dim_z; z++)
+		for (size_t z = 0; z < dim_z; z++)
 		{
 			uint16_t* data16 = static_cast<uint16_t*>(img->data[z]);
-			for (unsigned int y = 0; y < dim_y; y++)
+			for (size_t y = 0; y < dim_y; y++)
 			{
 				int ym = y_flip ? dim_y - y - 1 : y;
 				switch (image_components)
 				{
 				case 1:		// single-component, treated as Luminance
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers16[z][y][x] = data16[(4 * dim_x * ym) + (4 * x    )];
 					}
 					break;
 				case 2:		// two-component, treated as Luminance-Alpha
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers16[z][y][2 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
 						row_pointers16[z][y][2 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 3)];
 					}
 					break;
 				case 3:		// three-component, treated as RGB
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers16[z][y][3 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
 						row_pointers16[z][y][3 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
@@ -1603,7 +1603,7 @@ static bool store_ktx_uncompressed_image(
 					}
 					break;
 				case 4:		// four-component, treated as RGBA
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers16[z][y][4 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
 						row_pointers16[z][y][4 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
@@ -1760,7 +1760,7 @@ static astcenc_image* load_dds_uncompressed_image(
 	const char* filename,
 	bool y_flip,
 	bool& is_hdr,
-	unsigned int& component_count
+	size_t& component_count
 ) {
 	FILE *f = fopen(filename, "rb");
 	if (!f)
@@ -1817,9 +1817,9 @@ static astcenc_image* load_dds_uncompressed_image(
 		}
 	}
 
-	unsigned int dim_x = hdr.width;
-	unsigned int dim_y = hdr.height;
-	unsigned int dim_z = (hdr.flags & 0x800000) ? hdr.depth : 1;
+	size_t dim_x = hdr.width;
+	size_t dim_y = hdr.height;
+	size_t dim_z = (hdr.flags & 0x800000) ? hdr.depth : 1;
 
 	// The bitcount that we will use internally in the codec
 	int bitness = 0;
@@ -2007,12 +2007,12 @@ static astcenc_image* load_dds_uncompressed_image(
 	// then transfer data from the surface to our own image-data-structure.
 	astcenc_image *astc_img = alloc_image(bitness, dim_x, dim_y, dim_z);
 
-	for (unsigned int z = 0; z < dim_z; z++)
+	for (size_t z = 0; z < dim_z; z++)
 	{
-		for (unsigned int y = 0; y < dim_y; y++)
+		for (size_t y = 0; y < dim_y; y++)
 		{
-			unsigned int ymod = y_flip ? dim_y - y - 1 : y;
-			unsigned int ydst = ymod;
+			size_t ymod = y_flip ? dim_y - y - 1 : y;
+			size_t ydst = ymod;
 			void* dst;
 
 			if (astc_img->data_type == ASTCENC_TYPE_U8)
@@ -2052,9 +2052,9 @@ static bool store_dds_uncompressed_image(
 	const char* filename,
 	int y_flip
 ) {
-	unsigned int dim_x = img->dim_x;
-	unsigned int dim_y = img->dim_y;
-	unsigned int dim_z = img->dim_z;
+	size_t dim_x = img->dim_x;
+	size_t dim_y = img->dim_y;
+	size_t dim_z = img->dim_z;
 
 	int bitness = img->data_type == ASTCENC_TYPE_U8 ? 8 : 16;
 	int image_components = (bitness == 16) ? 4 : determine_image_components(img);
@@ -2089,7 +2089,7 @@ static bool store_dds_uncompressed_image(
 	hdr.pitch_or_linear_size = image_components * (bitness / 8) * dim_x;
 	hdr.depth = dim_z;
 	hdr.mipmapcount = 1;
-	for (unsigned int i = 0; i < 11; i++)
+	for (size_t i = 0; i < 11; i++)
 	{
 		hdr.reserved1[i] = 0;
 	}
@@ -2126,44 +2126,44 @@ static bool store_dds_uncompressed_image(
 		row_pointers8[0] = new uint8_t *[dim_y * dim_z];
 		row_pointers8[0][0] = new uint8_t[dim_x * dim_y * dim_z * image_components];
 
-		for (unsigned int z = 1; z < dim_z; z++)
+		for (size_t z = 1; z < dim_z; z++)
 		{
 			row_pointers8[z] = row_pointers8[0] + dim_y * z;
 			row_pointers8[z][0] = row_pointers8[0][0] + dim_y * dim_z * image_components * z;
 		}
 
-		for (unsigned int z = 0; z < dim_z; z++)
+		for (size_t z = 0; z < dim_z; z++)
 		{
-			for (unsigned int y = 1; y < dim_y; y++)
+			for (size_t y = 1; y < dim_y; y++)
 			{
 				row_pointers8[z][y] = row_pointers8[z][0] + dim_x * image_components * y;
 			}
 		}
 
-		for (unsigned int z = 0; z < dim_z; z++)
+		for (size_t z = 0; z < dim_z; z++)
 		{
 			uint8_t* data8 = static_cast<uint8_t*>(img->data[z]);
 
-			for (unsigned int y = 0; y < dim_y; y++)
+			for (size_t y = 0; y < dim_y; y++)
 			{
 				int ym = y_flip ? dim_y - y - 1 : y;
 				switch (image_components)
 				{
 				case 1:		// single-component, treated as Luminance
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers8[z][y][x] = data8[(4 * dim_x * ym) + (4 * x    )];
 					}
 					break;
 				case 2:		// two-component, treated as Luminance-Alpha
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers8[z][y][2 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
 						row_pointers8[z][y][2 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 3)];
 					}
 					break;
 				case 3:		// three-component, treated as RGB
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers8[z][y][3 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
 						row_pointers8[z][y][3 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
@@ -2171,7 +2171,7 @@ static bool store_dds_uncompressed_image(
 					}
 					break;
 				case 4:		// four-component, treated as RGBA
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers8[z][y][4 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
 						row_pointers8[z][y][4 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
@@ -2189,44 +2189,44 @@ static bool store_dds_uncompressed_image(
 		row_pointers16[0] = new uint16_t *[dim_y * dim_z];
 		row_pointers16[0][0] = new uint16_t[dim_x * dim_y * dim_z * image_components];
 
-		for (unsigned int z = 1; z < dim_z; z++)
+		for (size_t z = 1; z < dim_z; z++)
 		{
 			row_pointers16[z] = row_pointers16[0] + dim_y * z;
 			row_pointers16[z][0] = row_pointers16[0][0] + dim_y * dim_x * image_components * z;
 		}
 
-		for (unsigned int z = 0; z < dim_z; z++)
+		for (size_t z = 0; z < dim_z; z++)
 		{
-			for (unsigned int y = 1; y < dim_y; y++)
+			for (size_t y = 1; y < dim_y; y++)
 			{
 				row_pointers16[z][y] = row_pointers16[z][0] + dim_x * image_components * y;
 			}
 		}
 
-		for (unsigned int z = 0; z < dim_z; z++)
+		for (size_t z = 0; z < dim_z; z++)
 		{
 			uint16_t* data16 = static_cast<uint16_t*>(img->data[z]);
 
-			for (unsigned int y = 0; y < dim_y; y++)
+			for (size_t y = 0; y < dim_y; y++)
 			{
 				int ym = y_flip ? dim_y - y - 1: y;
 				switch (image_components)
 				{
 				case 1:		// single-component, treated as Luminance
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers16[z][y][x] = data16[(4 * dim_x * ym) + (4 * x    )];
 					}
 					break;
 				case 2:		// two-component, treated as Luminance-Alpha
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers16[z][y][2 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
 						row_pointers16[z][y][2 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 3)];
 					}
 					break;
 				case 3:		// three-component, treated as RGB
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers16[z][y][3 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
 						row_pointers16[z][y][3 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
@@ -2234,7 +2234,7 @@ static bool store_dds_uncompressed_image(
 					}
 					break;
 				case 4:		// four-component, treated as RGBA
-					for (unsigned int x = 0; x < dim_x; x++)
+					for (size_t x = 0; x < dim_x; x++)
 					{
 						row_pointers16[z][y][4 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
 						row_pointers16[z][y][4 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
@@ -2311,7 +2311,7 @@ static const struct
 {
 	const char* ending1;
 	const char* ending2;
-	astcenc_image* (*loader_func)(const char*, bool, bool&, unsigned int&);
+	astcenc_image* (*loader_func)(const char*, bool, bool&, size_t&);
 } loader_descs[] {
 	// LDR formats
 	{".png",   ".PNG",  load_png_with_wuffs},
@@ -2377,7 +2377,7 @@ astcenc_image* load_ncimage(
 	const char* filename,
 	bool y_flip,
 	bool& is_hdr,
-	unsigned int& component_count
+	size_t& component_count
 ) {
 	// Get the file extension
 	const char* eptr = strrchr(filename, '.');
@@ -2387,7 +2387,7 @@ astcenc_image* load_ncimage(
 	}
 
 	// Scan through descriptors until a matching loader is found
-	for (unsigned int i = 0; i < loader_descr_count; i++)
+	for (size_t i = 0; i < loader_descr_count; i++)
 	{
 		if (loader_descs[i].ending1 == nullptr
 			|| strcmp(eptr, loader_descs[i].ending1) == 0
@@ -2443,16 +2443,16 @@ struct astc_header
 
 static const uint32_t ASTC_MAGIC_ID = 0x5CA1AB13;
 
-static unsigned int unpack_bytes(
+static size_t unpack_bytes(
 	uint8_t a,
 	uint8_t b,
 	uint8_t c,
 	uint8_t d
 ) {
-	return (static_cast<unsigned int>(a)      ) +
-	       (static_cast<unsigned int>(b) <<  8) +
-	       (static_cast<unsigned int>(c) << 16) +
-	       (static_cast<unsigned int>(d) << 24);
+	return (static_cast<size_t>(a)      ) +
+	       (static_cast<size_t>(b) <<  8) +
+	       (static_cast<size_t>(c) << 16) +
+	       (static_cast<size_t>(d) << 24);
 }
 
 /* See header for documentation. */
@@ -2475,7 +2475,7 @@ int load_cimage(
 		return 1;
 	}
 
-	unsigned int magicval = unpack_bytes(hdr.magic[0], hdr.magic[1], hdr.magic[2], hdr.magic[3]);
+	size_t magicval = unpack_bytes(hdr.magic[0], hdr.magic[1], hdr.magic[2], hdr.magic[3]);
 	if (magicval != ASTC_MAGIC_ID)
 	{
 		print_error("ERROR: File not recognized '%s'\n", filename);
@@ -2483,13 +2483,13 @@ int load_cimage(
 	}
 
 	// Ensure these are not zero to avoid div by zero
-	unsigned int block_x = astc::max(static_cast<unsigned int>(hdr.block_x), 1u);
-	unsigned int block_y = astc::max(static_cast<unsigned int>(hdr.block_y), 1u);
-	unsigned int block_z = astc::max(static_cast<unsigned int>(hdr.block_z), 1u);
+	size_t block_x = astc::max(static_cast<size_t>(hdr.block_x), 1_z);
+	size_t block_y = astc::max(static_cast<size_t>(hdr.block_y), 1_z);
+	size_t block_z = astc::max(static_cast<size_t>(hdr.block_z), 1_z);
 
-	unsigned int dim_x = unpack_bytes(hdr.dim_x[0], hdr.dim_x[1], hdr.dim_x[2], 0);
-	unsigned int dim_y = unpack_bytes(hdr.dim_y[0], hdr.dim_y[1], hdr.dim_y[2], 0);
-	unsigned int dim_z = unpack_bytes(hdr.dim_z[0], hdr.dim_z[1], hdr.dim_z[2], 0);
+	size_t dim_x = unpack_bytes(hdr.dim_x[0], hdr.dim_x[1], hdr.dim_x[2], 0);
+	size_t dim_y = unpack_bytes(hdr.dim_y[0], hdr.dim_y[1], hdr.dim_y[2], 0);
+	size_t dim_z = unpack_bytes(hdr.dim_z[0], hdr.dim_z[1], hdr.dim_z[2], 0);
 
 	if (dim_x == 0 || dim_y == 0 || dim_z == 0)
 	{
@@ -2497,9 +2497,9 @@ int load_cimage(
 		return 1;
 	}
 
-	unsigned int xblocks = (dim_x + block_x - 1) / block_x;
-	unsigned int yblocks = (dim_y + block_y - 1) / block_y;
-	unsigned int zblocks = (dim_z + block_z - 1) / block_z;
+	size_t xblocks = (dim_x + block_x - 1) / block_x;
+	size_t yblocks = (dim_y + block_y - 1) / block_y;
+	size_t zblocks = (dim_z + block_z - 1) / block_z;
 
 	size_t data_size = xblocks * yblocks * zblocks * 16;
 	uint8_t *buffer = new uint8_t[data_size];
diff --git a/Source/astcenccli_internal.h b/Source/astcenccli_internal.h
index ea3ae384..0043d27d 100644
--- a/Source/astcenccli_internal.h
+++ b/Source/astcenccli_internal.h
@@ -36,22 +36,22 @@
 struct astc_compressed_image
 {
 	/** @brief The block width in texels. */
-	unsigned int block_x;
+	size_t block_x;
 
 	/** @brief The block height in texels. */
-	unsigned int block_y;
+	size_t block_y;
 
 	/** @brief The block depth in texels. */
-	unsigned int block_z;
+	size_t block_z;
 
 	/** @brief The image width in texels. */
-	unsigned int dim_x;
+	size_t dim_x;
 
 	/** @brief The image height in texels. */
-	unsigned int dim_y;
+	size_t dim_y;
 
 	/** @brief The image depth in texels. */
-	unsigned int dim_z;
+	size_t dim_z;
 
 	/** @brief The binary data payload. */
 	uint8_t* data;
@@ -66,13 +66,13 @@ struct astc_compressed_image
 struct cli_config_options
 {
 	/** @brief The number of threads to use for processing. */
-	unsigned int thread_count;
+	size_t thread_count;
 
 	/** @brief The number of repeats to execute for benchmarking. */
-	unsigned int repeat_count;
+	size_t repeat_count;
 
 	/** @brief The number of image slices to load for a 3D image. */
-	unsigned int array_size;
+	size_t array_size;
 
 	/** @brief @c true if running in silent mode with minimal output. */
 	bool silentmode;
@@ -130,7 +130,7 @@ astcenc_image* load_ncimage(
 	const char* filename,
 	bool y_flip,
 	bool& is_hdr,
-	unsigned int& component_count);
+	size_t& component_count);
 
 /**
  * @brief Load uncompressed PNG image.
@@ -146,7 +146,7 @@ astcenc_image* load_png_with_wuffs(
 	const char* filename,
 	bool y_flip,
 	bool& is_hdr,
-	unsigned int& component_count);
+	size_t& component_count);
 
 /**
  * @brief Save an uncompressed image.
@@ -189,10 +189,10 @@ int get_output_filename_enforced_bitness(
  * @return The allocated image, or @c nullptr on error.
  */
 astcenc_image* alloc_image(
-	unsigned int bitness,
-	unsigned int dim_x,
-	unsigned int dim_y,
-	unsigned int dim_z);
+	size_t bitness,
+	size_t dim_x,
+	size_t dim_y,
+	size_t dim_z);
 
 /**
  * @brief Free an image.
@@ -276,8 +276,8 @@ bool store_ktx_compressed_image(
  */
 astcenc_image* astc_img_from_floatx4_array(
 	const float* data,
-	unsigned int dim_x,
-	unsigned int dim_y,
+	size_t dim_x,
+	size_t dim_y,
 	bool y_flip);
 
 /**
@@ -292,8 +292,8 @@ astcenc_image* astc_img_from_floatx4_array(
  */
 astcenc_image* astc_img_from_unorm8x4_array(
 	const uint8_t* data,
-	unsigned int dim_x,
-	unsigned int dim_y,
+	size_t dim_x,
+	size_t dim_y,
 	bool y_flip);
 
 /**
@@ -310,7 +310,7 @@ astcenc_image* astc_img_from_unorm8x4_array(
 float* floatx4_array_from_astc_img(
 	const astcenc_image* img,
 	bool y_flip,
-	unsigned int z_index);
+	size_t z_index);
 
 /**
  * @brief Create a flattened RGBA UNORM8 data array from an image structure.
diff --git a/Source/astcenccli_toplevel.cpp b/Source/astcenccli_toplevel.cpp
index e71a6680..8dfcfda7 100644
--- a/Source/astcenccli_toplevel.cpp
+++ b/Source/astcenccli_toplevel.cpp
@@ -40,7 +40,7 @@
 	Data structure definitions
 ============================================================================ */
 
-typedef unsigned int astcenc_operation;
+typedef size_t astcenc_operation;
 
 struct mode_entry
 {
@@ -54,25 +54,25 @@ struct mode_entry
 ============================================================================ */
 
 /** @brief Stage bit indicating we need to load a compressed image. */
-static const unsigned int ASTCENC_STAGE_LD_COMP    = 1 << 0;
+static const size_t ASTCENC_STAGE_LD_COMP    = 1 << 0;
 
 /** @brief Stage bit indicating we need to store a compressed image. */
-static const unsigned int ASTCENC_STAGE_ST_COMP    = 1 << 1;
+static const size_t ASTCENC_STAGE_ST_COMP    = 1 << 1;
 
 /** @brief Stage bit indicating we need to load an uncompressed image. */
-static const unsigned int ASTCENC_STAGE_LD_NCOMP   = 1 << 2;
+static const size_t ASTCENC_STAGE_LD_NCOMP   = 1 << 2;
 
 /** @brief Stage bit indicating we need to store an uncompressed image. */
-static const unsigned int ASTCENC_STAGE_ST_NCOMP   = 1 << 3;
+static const size_t ASTCENC_STAGE_ST_NCOMP   = 1 << 3;
 
 /** @brief Stage bit indicating we need compress an image. */
-static const unsigned int ASTCENC_STAGE_COMPRESS   = 1 << 4;
+static const size_t ASTCENC_STAGE_COMPRESS   = 1 << 4;
 
 /** @brief Stage bit indicating we need to decompress an image. */
-static const unsigned int ASTCENC_STAGE_DECOMPRESS = 1 << 5;
+static const size_t ASTCENC_STAGE_DECOMPRESS = 1 << 5;
 
 /** @brief Stage bit indicating we need to compare an image with the original input. */
-static const unsigned int ASTCENC_STAGE_COMPARE    = 1 << 6;
+static const size_t ASTCENC_STAGE_COMPARE    = 1 << 6;
 
 /** @brief Operation indicating an unknown request (should never happen). */
 static const astcenc_operation ASTCENC_OP_UNKNOWN  = 0;
@@ -168,18 +168,18 @@ struct decompression_workload
 extern "C" void progress_emitter(
 	float value
 ) {
-	const unsigned int bar_size = 25;
-	unsigned int parts = static_cast<int>(value / 4.0f);
+	const size_t bar_size = 25;
+	size_t parts = static_cast<int>(value / 4.0f);
 
 	char buffer[bar_size + 3];
 	buffer[0] = '[';
 
-	for (unsigned int i = 0; i < parts; i++)
+	for (size_t i = 0; i < parts; i++)
 	{
 		buffer[i + 1] = '=';
 	}
 
-	for (unsigned int i = parts; i < bar_size; i++)
+	for (size_t i = parts; i < bar_size; i++)
 	{
 		buffer[i + 1] = ' ';
 	}
@@ -293,7 +293,7 @@ static void decompression_workload_runner(
  */
 static std::string get_slice_filename(
 	const std::string& basename,
-	unsigned int index,
+	size_t index,
 	bool& error
 ) {
 	size_t sep = basename.find_last_of('.');
@@ -323,10 +323,10 @@ static std::string get_slice_filename(
  */
 static astcenc_image* load_uncomp_file(
 	const char* filename,
-	unsigned int dim_z,
+	size_t dim_z,
 	bool y_flip,
 	bool& is_hdr,
-	unsigned int& component_count
+	size_t& component_count
 ) {
 	astcenc_image *image = nullptr;
 
@@ -338,12 +338,12 @@ static astcenc_image* load_uncomp_file(
 	else
 	{
 		bool slice_is_hdr;
-		unsigned int slice_component_count;
+		size_t slice_component_count;
 		astcenc_image* slice = nullptr;
 		std::vector<astcenc_image*> slices;
 
 		// For a 3D image load an array of slices
-		for (unsigned int image_index = 0; image_index < dim_z; image_index++)
+		for (size_t image_index = 0; image_index < dim_z; image_index++)
 		{
 			bool error;
 			std::string slice_name = get_slice_filename(filename, image_index, error);
@@ -396,15 +396,15 @@ static astcenc_image* load_uncomp_file(
 		// If all slices loaded correctly then repack them into a single image
 		if (slices.size() == dim_z)
 		{
-			unsigned int dim_x = slices[0]->dim_x;
-			unsigned int dim_y = slices[0]->dim_y;
+			size_t dim_x = slices[0]->dim_x;
+			size_t dim_y = slices[0]->dim_y;
 			int bitness = is_hdr ? 16 : 8;
 			int slice_size = dim_x * dim_y;
 
 			image = alloc_image(bitness, dim_x, dim_y, dim_z);
 
 			// Combine 2D source images into one 3D image
-			for (unsigned int z = 0; z < dim_z; z++)
+			for (size_t z = 0; z < dim_z; z++)
 			{
 				if (image->data_type == ASTCENC_TYPE_U8)
 				{
@@ -503,9 +503,9 @@ static int init_astcenc_config(
 	astcenc_preprocess& preprocess,
 	astcenc_config& config
 ) {
-	unsigned int block_x = 0;
-	unsigned int block_y = 0;
-	unsigned int block_z = 1;
+	size_t block_x = 0;
+	size_t block_y = 0;
+	size_t block_z = 1;
 
 	// For decode the block size is set by the incoming image.
 	if (operation == ASTCENC_OP_DECOMPRESS)
@@ -530,7 +530,7 @@ static int init_astcenc_config(
 		}
 
 		int cnt2D, cnt3D;
-		int dimensions = sscanf(argv[4], "%ux%u%nx%u%n",
+		int dimensions = sscanf(argv[4], "%zux%zu%nx%zu%n",
 		                        &block_x, &block_y, &cnt2D, &block_z, &cnt3D);
 		// Character after the last match should be a NUL
 		if (!(((dimensions == 2) && !argv[4][cnt2D]) || ((dimensions == 3) && !argv[4][cnt3D])))
@@ -583,7 +583,7 @@ static int init_astcenc_config(
 		argidx = 6;
 	}
 
-	unsigned int flags = 0;
+	size_t flags = 0;
 
 	// Gather the flags that we need
 	while (argidx < argc)
@@ -1152,7 +1152,7 @@ static int edit_astcenc_config(
 			argidx++;
 
 			// Read array size (image depth).
-			if (!sscanf(argv[argidx], "%u", &cli_config.array_size) || cli_config.array_size == 0)
+			if (!sscanf(argv[argidx], "%zu", &cli_config.array_size) || cli_config.array_size == 0)
 			{
 				print_error("ERROR: -zdim size '%s' is invalid\n", argv[argidx]);
 				return 1;
@@ -1243,36 +1243,36 @@ static void print_astcenc_config(
 
 		if (config.block_z == 1)
 		{
-			printf("    Block size:                 %ux%u\n", config.block_x, config.block_y);
+			printf("    Block size:                 %zux%zu\n", config.block_x, config.block_y);
 		}
 		else
 		{
-			printf("    Block size:                 %ux%ux%u\n", config.block_x, config.block_y, config.block_z);
+			printf("    Block size:                 %zux%zux%zu\n", config.block_x, config.block_y, config.block_z);
 		}
 
 		printf("    Bitrate:                    %3.2f bpp\n", 128.0 / (config.block_x * config.block_y * config.block_z));
-		printf("    RGB alpha scale weight:     %d\n", (config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT));
+		printf("    RGB alpha scale weight:     %zu\n", (config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT));
 		if ((config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT))
 		{
-			printf("    Radius RGB alpha scale:     %u texels\n", config.a_scale_radius);
+			printf("    Radius RGB alpha scale:     %zu texels\n", config.a_scale_radius);
 		}
 
 		printf("    R component weight:         %g\n", static_cast<double>(config.cw_r_weight));
 		printf("    G component weight:         %g\n", static_cast<double>(config.cw_g_weight));
 		printf("    B component weight:         %g\n", static_cast<double>(config.cw_b_weight));
 		printf("    A component weight:         %g\n", static_cast<double>(config.cw_a_weight));
-		printf("    Partition cutoff:           %u partitions\n", config.tune_partition_count_limit);
-		printf("    2 partition index cutoff:   %u partition ids\n", config.tune_2partition_index_limit);
-		printf("    3 partition index cutoff:   %u partition ids\n", config.tune_3partition_index_limit);
-		printf("    4 partition index cutoff:   %u partition ids\n", config.tune_4partition_index_limit);
+		printf("    Partition cutoff:           %zu partitions\n", config.tune_partition_count_limit);
+		printf("    2 partition index cutoff:   %zu partition ids\n", config.tune_2partition_index_limit);
+		printf("    3 partition index cutoff:   %zu partition ids\n", config.tune_3partition_index_limit);
+		printf("    4 partition index cutoff:   %zu partition ids\n", config.tune_4partition_index_limit);
 		printf("    PSNR cutoff:                %g dB\n", static_cast<double>(config.tune_db_limit));
 		printf("    3 partition cutoff:         %g\n", static_cast<double>(config.tune_2partition_early_out_limit_factor));
 		printf("    4 partition cutoff:         %g\n", static_cast<double>(config.tune_3partition_early_out_limit_factor));
 		printf("    2 plane correlation cutoff: %g\n", static_cast<double>(config.tune_2plane_early_out_limit_correlation));
 		printf("    Block mode centile cutoff:  %g%%\n", static_cast<double>(config.tune_block_mode_limit));
-		printf("    Candidate cutoff:           %u candidates\n", config.tune_candidate_limit);
-		printf("    Refinement cutoff:          %u iterations\n", config.tune_refinement_limit);
-		printf("    Compressor thread count:    %d\n", cli_config.thread_count);
+		printf("    Candidate cutoff:           %zu candidates\n", config.tune_candidate_limit);
+		printf("    Refinement cutoff:          %zu iterations\n", config.tune_refinement_limit);
+		printf("    Compressor thread count:    %zu\n", cli_config.thread_count);
 		printf("\n");
 	}
 }
@@ -1293,9 +1293,9 @@ static void print_astcenc_config(
  */
 static vfloat4 image_get_pixel(
 	const astcenc_image& img,
-	unsigned int x,
-	unsigned int y,
-	unsigned int z
+	size_t x,
+	size_t y,
+	size_t z
 ) {
 	// We should never escape bounds
 	assert(x < img.dim_x);
@@ -1351,9 +1351,9 @@ static vfloat4 image_get_pixel(
  */
 static void image_set_pixel(
 	astcenc_image& img,
-	unsigned int x,
-	unsigned int y,
-	unsigned int z,
+	size_t x,
+	size_t y,
+	size_t z,
 	vfloat4 pixel
 ) {
 	// We should never escape bounds
@@ -1408,11 +1408,11 @@ static void image_preprocess_normalize(
 	const astcenc_image& input,
 	astcenc_image& output
 ) {
-	for (unsigned int z = 0; z < input.dim_z; z++)
+	for (size_t z = 0; z < input.dim_z; z++)
 	{
-		for (unsigned int y = 0; y < input.dim_y; y++)
+		for (size_t y = 0; y < input.dim_y; y++)
 		{
-			for (unsigned int x = 0; x < input.dim_x; x++)
+			for (size_t x = 0; x < input.dim_x; x++)
 			{
 				vfloat4 pixel = image_get_pixel(input, x, y, z);
 
@@ -1487,11 +1487,11 @@ static void image_preprocess_premultiply(
 	astcenc_image& output,
 	astcenc_profile profile
 ) {
-	for (unsigned int z = 0; z < input.dim_z; z++)
+	for (size_t z = 0; z < input.dim_z; z++)
 	{
-		for (unsigned int y = 0; y < input.dim_y; y++)
+		for (size_t y = 0; y < input.dim_y; y++)
 		{
-			for (unsigned int x = 0; x < input.dim_x; x++)
+			for (size_t x = 0; x < input.dim_x; x++)
 			{
 				vfloat4 pixel = image_get_pixel(input, x, y, z);
 
@@ -2011,7 +2011,7 @@ int astcenc_main(
 	}
 
 	astcenc_image* image_uncomp_in = nullptr ;
-	unsigned int image_uncomp_in_component_count = 0;
+	size_t image_uncomp_in_component_count = 0;
 	bool image_uncomp_in_is_hdr = false;
 	astcenc_image* image_decomp_out = nullptr;
 
@@ -2126,15 +2126,15 @@ int astcenc_main(
 			printf("    Color profile:              %s\n", image_uncomp_in_is_hdr ? "HDR" : "LDR");
 			if (image_uncomp_in->dim_z > 1)
 			{
-				printf("    Dimensions:                 3D, %ux%ux%u\n",
+				printf("    Dimensions:                 3D, %zux%zux%zu\n",
 				       image_uncomp_in->dim_x, image_uncomp_in->dim_y, image_uncomp_in->dim_z);
 			}
 			else
 			{
-				printf("    Dimensions:                 2D, %ux%u\n",
+				printf("    Dimensions:                 2D, %zux%zu\n",
 				       image_uncomp_in->dim_x, image_uncomp_in->dim_y);
 			}
-			printf("    Components:                 %d\n\n", image_uncomp_in_component_count);
+			printf("    Components:                 %zu\n\n", image_uncomp_in_component_count);
 		}
 	}
 
@@ -2159,9 +2159,9 @@ int astcenc_main(
 	{
 		print_astcenc_config(cli_config, config);
 
-		unsigned int blocks_x = (image_uncomp_in->dim_x + config.block_x - 1) / config.block_x;
-		unsigned int blocks_y = (image_uncomp_in->dim_y + config.block_y - 1) / config.block_y;
-		unsigned int blocks_z = (image_uncomp_in->dim_z + config.block_z - 1) / config.block_z;
+		size_t blocks_x = (image_uncomp_in->dim_x + config.block_x - 1) / config.block_x;
+		size_t blocks_y = (image_uncomp_in->dim_y + config.block_y - 1) / config.block_y;
+		size_t blocks_z = (image_uncomp_in->dim_z + config.block_z - 1) / config.block_z;
 		size_t buffer_size = blocks_x * blocks_y * blocks_z * 16;
 		uint8_t* buffer = new uint8_t[buffer_size];
 
@@ -2176,7 +2176,7 @@ int astcenc_main(
 		// Only launch worker threads for multi-threaded use - it makes basic
 		// single-threaded profiling and debugging a little less convoluted
 		double start_compression_time = get_time();
-		for (unsigned int i = 0; i < cli_config.repeat_count; i++)
+		for (size_t i = 0; i < cli_config.repeat_count; i++)
 		{
 			if (config.progress_callback)
 			{
@@ -2244,7 +2244,7 @@ int astcenc_main(
 		// Only launch worker threads for multi-threaded use - it makes basic
 		// single-threaded profiling and debugging a little less convoluted
 		double start_decompression_time = get_time();
-		for (unsigned int i = 0; i < cli_config.repeat_count; i++)
+		for (size_t i = 0; i < cli_config.repeat_count; i++)
 		{
 			double start_iter_time = get_time();
 			if (cli_config.thread_count > 1)
diff --git a/Source/astcenccli_toplevel_help.cpp b/Source/astcenccli_toplevel_help.cpp
index f475b39c..56bbca1c 100644
--- a/Source/astcenccli_toplevel_help.cpp
+++ b/Source/astcenccli_toplevel_help.cpp
@@ -582,7 +582,7 @@ void astcenc_print_header()
 	const char* f16ctype = "";
 #endif
 
-	unsigned int bits = static_cast<unsigned int>(sizeof(void*) * 8);
+	size_t bits = static_cast<size_t>(sizeof(void*) * 8);
 	printf(astcenc_copyright_string,
 	       VERSION_STRING, bits, simdtype, pcnttype, f16ctype, YEAR_STRING);