Skip to content

Add MSVC preprocessor ISA detection #568

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions Source/astcenc_averages_and_directions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ static void compute_partition_averages_rgb(
{
vint texel_partition(pi.partition_of_texel + i);

vmask lane_mask = lane_id < vint(texel_count);
vmask lane_mask = lane_id < vint_from_size(texel_count);
lane_id += vint(ASTCENC_SIMD_WIDTH);

vmask p0_mask = lane_mask & (texel_partition == vint(0));
Expand Down Expand Up @@ -104,7 +104,7 @@ static void compute_partition_averages_rgb(
{
vint texel_partition(pi.partition_of_texel + i);

vmask lane_mask = lane_id < vint(texel_count);
vmask lane_mask = lane_id < vint_from_size(texel_count);
lane_id += vint(ASTCENC_SIMD_WIDTH);

vmask p0_mask = lane_mask & (texel_partition == vint(0));
Expand Down Expand Up @@ -149,7 +149,7 @@ static void compute_partition_averages_rgb(
{
vint texel_partition(pi.partition_of_texel + i);

vmask lane_mask = lane_id < vint(texel_count);
vmask lane_mask = lane_id < vint_from_size(texel_count);
lane_id += vint(ASTCENC_SIMD_WIDTH);

vmask p0_mask = lane_mask & (texel_partition == vint(0));
Expand Down Expand Up @@ -239,7 +239,7 @@ static void compute_partition_averages_rgba(
{
vint texel_partition(pi.partition_of_texel + i);

vmask lane_mask = lane_id < vint(texel_count);
vmask lane_mask = lane_id < vint_from_size(texel_count);
lane_id += vint(ASTCENC_SIMD_WIDTH);

vmask p0_mask = lane_mask & (texel_partition == vint(0));
Expand Down Expand Up @@ -279,7 +279,7 @@ static void compute_partition_averages_rgba(
{
vint texel_partition(pi.partition_of_texel + i);

vmask lane_mask = lane_id < vint(texel_count);
vmask lane_mask = lane_id < vint_from_size(texel_count);
lane_id += vint(ASTCENC_SIMD_WIDTH);

vmask p0_mask = lane_mask & (texel_partition == vint(0));
Expand Down Expand Up @@ -330,7 +330,7 @@ static void compute_partition_averages_rgba(
{
vint texel_partition(pi.partition_of_texel + i);

vmask lane_mask = lane_id < vint(texel_count);
vmask lane_mask = lane_id < vint_from_size(texel_count);
lane_id += vint(ASTCENC_SIMD_WIDTH);

vmask p0_mask = lane_mask & (texel_partition == vint(0));
Expand Down Expand Up @@ -777,7 +777,7 @@ void compute_error_squared_rgba(
vint lane_ids = vint::lane_id();
for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
{
vmask mask = lane_ids < vint(texel_count);
vmask mask = lane_ids < vint_from_size(texel_count);
const uint8_t* texel_idxs = texel_indexes + i;

vfloat data_r = gatherf_byte_inds<vfloat>(blk.data_r, texel_idxs);
Expand Down Expand Up @@ -891,7 +891,7 @@ void compute_error_squared_rgb(
vint lane_ids = vint::lane_id();
for (size_t i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
{
vmask mask = lane_ids < vint(texel_count);
vmask mask = lane_ids < vint_from_size(texel_count);
const uint8_t* texel_idxs = texel_indexes + i;

vfloat data_r = gatherf_byte_inds<vfloat>(blk.data_r, texel_idxs);
Expand Down
6 changes: 3 additions & 3 deletions Source/astcenc_mathlib.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2024 Arm Limited
// Copyright 2011-2025 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
Expand Down Expand Up @@ -48,7 +48,7 @@
#define ASTCENC_SSE 42
#elif defined(__SSE4_1__)
#define ASTCENC_SSE 41
#elif defined(__SSE2__)
#elif defined(__SSE2__) || (defined(_M_AMD64) && !defined(_M_ARM64EC))
#define ASTCENC_SSE 20
#else
#define ASTCENC_SSE 0
Expand All @@ -68,7 +68,7 @@
#endif

#ifndef ASTCENC_NEON
#if defined(__aarch64__)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#define ASTCENC_NEON 1
#else
#define ASTCENC_NEON 0
Expand Down
11 changes: 7 additions & 4 deletions Source/astcenc_pick_best_endpoint_format.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2024 Arm Limited
// Copyright 2011-2025 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
Expand Down Expand Up @@ -1292,9 +1292,12 @@ unsigned int compute_ideal_endpoint_formats(
vint vbest_error_index(-1);
vfloat vbest_ep_error(ERROR_CALC_DEFAULT);

start_block_mode = round_down_to_simd_multiple_vla(start_block_mode);
vint lane_ids = vint::lane_id() + vint(start_block_mode);
for (unsigned int j = start_block_mode; j < end_block_mode; j += ASTCENC_SIMD_WIDTH)
// TODO: This should use size_t for the inputs of start/end_block_mode
// to avoid some of this type conversion, but that propagates and will
// need a bigger PR to fix
size_t start_mode = round_down_to_simd_multiple_vla(start_block_mode);
vint lane_ids = vint::lane_id() + vint_from_size(start_mode);
for (size_t j = start_mode; j < end_block_mode; j += ASTCENC_SIMD_WIDTH)
{
vfloat err = vfloat(errors_of_best_combination + j);
vmask mask = err < vbest_ep_error;
Expand Down
5 changes: 5 additions & 0 deletions Source/astcenc_vecmathlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ template<typename T> T gatherf_byte_inds(const float* base, const uint8_t* indic

constexpr auto loada = vfloat8::loada;
constexpr auto load1 = vfloat8::load1;
constexpr auto vint_from_size = vint8_from_size;

#elif ASTCENC_SSE >= 20
// If we have SSE expose 4-wide VLA, and 4-wide fixed width.
Expand All @@ -123,6 +124,7 @@ template<typename T> T gatherf_byte_inds(const float* base, const uint8_t* indic

constexpr auto loada = vfloat4::loada;
constexpr auto load1 = vfloat4::load1;
constexpr auto vint_from_size = vint4_from_size;

#elif ASTCENC_SVE == 8
// Check the compiler is configured with fixed-length 256-bit SVE.
Expand Down Expand Up @@ -154,6 +156,7 @@ template<typename T> T gatherf_byte_inds(const float* base, const uint8_t* indic

constexpr auto loada = vfloat8::loada;
constexpr auto load1 = vfloat8::load1;
constexpr auto vint_from_size = vint8_from_size;

#elif ASTCENC_NEON > 0
// If we have NEON expose 4-wide VLA.
Expand All @@ -173,6 +176,7 @@ template<typename T> T gatherf_byte_inds(const float* base, const uint8_t* indic

constexpr auto loada = vfloat4::loada;
constexpr auto load1 = vfloat4::load1;
constexpr auto vint_from_size = vint4_from_size;

#else
// If we have nothing expose 4-wide VLA, and 4-wide fixed width.
Expand Down Expand Up @@ -209,6 +213,7 @@ template<typename T> T gatherf_byte_inds(const float* base, const uint8_t* indic

constexpr auto loada = vfloat4::loada;
constexpr auto load1 = vfloat4::load1;
constexpr auto vint_from_size = vint4_from_size;
#endif

/**
Expand Down
9 changes: 9 additions & 0 deletions Source/astcenc_vecmathlib_avx2_8.h
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,15 @@ ASTCENC_SIMD_INLINE int hmax_s(vint8 a)
return _mm256_cvtsi256_si32(hmax(a).m);
}

/**
* @brief Generate a vint8 from a size_t.
*/
ASTCENC_SIMD_INLINE vint8 vint8_from_size(size_t a)
{
assert(a <= std::numeric_limits<int>::max());
return vint8(static_cast<int>(a));
}

/**
* @brief Store a vector to a 16B aligned memory address.
*/
Expand Down
10 changes: 10 additions & 0 deletions Source/astcenc_vecmathlib_common_4.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#endif

#include <cstdio>
#include <limits>

// ============================================================================
// vint4 operators and functions
Expand Down Expand Up @@ -117,6 +118,15 @@ ASTCENC_SIMD_INLINE int hmin_s(vint4 a)
return hmin(a).lane<0>();
}

/**
* @brief Generate a vint4 from a size_t.
*/
ASTCENC_SIMD_INLINE vint4 vint4_from_size(size_t a)
{
assert(a <= std::numeric_limits<int>::max());
return vint4(static_cast<int>(a));
}

/**
* @brief Return the horizontal maximum of a vector.
*/
Expand Down
9 changes: 9 additions & 0 deletions Source/astcenc_vecmathlib_sve_8.h
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,15 @@ ASTCENC_SIMD_INLINE int hmax_s(vint8 a)
return svmaxv_s32(svptrue_b32(), a.m);
}

/**
* @brief Generate a vint8 from a size_t.
*/
ASTCENC_SIMD_INLINE vint8 vint8_from_size(size_t a)
{
assert(a <= std::numeric_limits<int>::max());
return vint8(static_cast<int>(a));
}

/**
* @brief Store a vector to a 16B aligned memory address.
*/
Expand Down
1 change: 1 addition & 0 deletions Source/cmake_core.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ macro(astcenc_set_properties ASTCENC_TARGET_NAME ASTCENC_VENEER_TYPE)

# MSVC compiler defines
$<${is_msvc_fe}:/EHsc>
$<${is_msvc_fe}:/WX>
$<${is_msvccl}:/wd4324>

# G++ and Clang++ compiler defines
Expand Down