Skip to content

Add big-endian support to the reference SIMD implementation #561

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ option(ASTCENC_ISA_NONE "Enable astcenc builds for no SIMD")
option(ASTCENC_ISA_NATIVE "Enable astcenc builds for native SIMD")
option(ASTCENC_DECOMPRESSOR "Enable astcenc builds for decompression only")
option(ASTCENC_SHAREDLIB "Enable astcenc builds with core library shared objects")
option(ASTCENC_BIG_ENDIAN "Enable astcenc big-endian support")
option(ASTCENC_DIAGNOSTICS "Enable astcenc builds with diagnostic trace")
option(ASTCENC_ASAN "Enable astcenc builds with address sanitizer")
option(ASTCENC_UBSAN "Enable astcenc builds with undefined behavior sanitizer")
Expand Down Expand Up @@ -137,6 +138,9 @@ if("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
printopt("Universal bin " ${ASTCENC_UNIVERSAL_BUILD})
endif()
printopt("Invariance " ${ASTCENC_INVARIANCE})
if(${ASTCENC_BIG_ENDIAN})
printopt("Big endian " ${ASTCENC_BIG_ENDIAN})
endif()
printopt("Shared libs " ${ASTCENC_SHAREDLIB})
printopt("Decompressor " ${ASTCENC_DECOMPRESSOR})
message(STATUS "Developer options")
Expand Down
32 changes: 22 additions & 10 deletions Docs/Building-BE.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ distributions still support BE platforms.
Even though Arm64 can run in a BE mode, it's now very rare in practice. It's no
longer supported out of the box in the latest Arm upstream compiler releases,
and getting hold of a sysroot is increasingly difficult. To test BE builds, I
therefore cross-compile Linux builds for MIPS64 and use `qemu-user` to run
therefore cross-compile Linux builds for PPC64 and use `qemu-user` to run
them. This doesn't use a real sysroot, and so everything must be compiled with
`-static` linkage.

Expand All @@ -19,15 +19,16 @@ Install the following host software:

```bash
# Compiler
sudo apt-get install g++-mips64-linux-gnuabi64
sudo apt-get install g++-powerpc64-linux-gnu

# Multi-arch libraries
sudo apt-get install g++-multilib-mips64-linux-gnuabi64
sudo apt-get install g++-multilib-powerpc64-linux-gnu

# QEMU
sudo apt-get install qemu-user-static
sudo apt-get install qemu-user-static qemu-user-binfmt binfmt-support
sudo mkdir /etc/qemu-binfmt
sudo ln -s /usr/mips64-linux-gnuabi64 /etc/qemu-binfmt/mips64
sudo ln -s /usr/powerpc64-linux-gnu /etc/qemu-binfmt/ppc64
sudo update-binfmts --import qemu-ppc64
```

## CMake toolchain file
Expand All @@ -41,19 +42,21 @@ file in the root of the project, with the following content:
set(CMAKE_SYSTEM_NAME Linux)

# Cross-compilers for C and C++
set(CMAKE_C_COMPILER mips64-linux-gnuabi64-gcc)
set(CMAKE_CXX_COMPILER mips64-linux-gnuabi64-g++)
set(CMAKE_C_COMPILER powerpc64-linux-gnu-gcc)
set(CMAKE_CXX_COMPILER powerpc64-linux-gnu-g++)

# Compiler environment
set(CMAKE_FIND_ROOT_PATH /usr/mips64-linux-gnuabi64)
set(CMAKE_FIND_ROOT_PATH /usr/powerpc64-linux-gnu)

# Default compiler and linker flags to use
set(CMAKE_C_FLAGS_INIT -static)
set(CMAKE_CXX_FLAGS_INIT -static)
set(CMAKE_EXE_LINKER_FLAGS_INIT -static)
set(CMAKE_SHARED_LINKER_FLAGS_INIT -static)
set(CMAKE_MODULE_LINKER_FLAGS_INIT -static)

# Build options
set(ASTCENC_BIG_ENDIAN ON)

# Never match host tools
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)

Expand All @@ -78,8 +81,17 @@ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../ -DASTCENC_ISA_NONE=O
The cross-compiled `astcenc` binary runs as normal, and can access host files,
but must run through QEMU to do the instruction set translation.

If the binfmt setup performed earlier was successful you can just run the
binary as if it were a native binary:

```
./bin/astcenc-none ...
```

... but otherwise you can run it manually using QEMU as a wrapper:

```
qemu-mips64 ./bin/astcenc-none ...
qemu-ppc64 ./bin/astcenc-none ...
```

- - -
Expand Down
6 changes: 6 additions & 0 deletions Source/UnitTest/cmake_core.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ if(${ASTCENC_ISA_SIMD} MATCHES "none")
ASTCENC_POPCNT=0
ASTCENC_F16C=0)

if(${ASTCENC_BIG_ENDIAN})
target_compile_definitions(${ASTCENC_TEST}
PRIVATE
ASTCENC_BIG_ENDIAN=1)
endif()

elseif(${ASTCENC_ISA_SIMD} MATCHES "neon")
target_compile_definitions(${ASTCENC_TEST}
PRIVATE
Expand Down
7 changes: 7 additions & 0 deletions Source/UnitTest/test_simd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2015,10 +2015,17 @@ TEST(vint4, interleave_rgba8)

vint4 result = interleave_rgba8(r, g, b, a);

#if !defined(ASTCENC_BIG_ENDIAN)
EXPECT_EQ(result.lane<0>(), 0x04030201);
EXPECT_EQ(result.lane<1>(), 0x14131211);
EXPECT_EQ(result.lane<2>(), 0x24232221);
EXPECT_EQ(result.lane<3>(), 0x34333231);
#else
EXPECT_EQ(result.lane<0>(), 0x01020304);
EXPECT_EQ(result.lane<1>(), 0x11121314);
EXPECT_EQ(result.lane<2>(), 0x21222324);
EXPECT_EQ(result.lane<3>(), 0x31323334);
#endif
}

# if ASTCENC_SIMD_WIDTH == 8
Expand Down
8 changes: 8 additions & 0 deletions Source/astcenc_vecmathlib_none_4.h
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,11 @@ ASTCENC_SIMD_INLINE void pack_and_store_low_bytes(vint4 a, uint8_t* p)
int b2 = a.m[2] & 0xFF;
int b3 = a.m[3] & 0xFF;

#if !defined(ASTCENC_BIG_ENDIAN)
int b = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
#else
int b = b3 | (b2 << 8) | (b1 << 16) | (b0 << 24);
#endif
a = vint4(b, 0, 0, 0);
store_nbytes(a, p);
}
Expand Down Expand Up @@ -1171,7 +1175,11 @@ ASTCENC_SIMD_INLINE vint4 vtable_lookup_32bit(
*/
ASTCENC_SIMD_INLINE vint4 interleave_rgba8(vint4 r, vint4 g, vint4 b, vint4 a)
{
#if !defined(ASTCENC_BIG_ENDIAN)
return r + lsl<8>(g) + lsl<16>(b) + lsl<24>(a);
#else
return a + lsl<8>(b) + lsl<16>(g) + lsl<24>(r);
#endif
}

/**
Expand Down
6 changes: 6 additions & 0 deletions Source/cmake_core.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,12 @@ macro(astcenc_set_properties ASTCENC_TARGET_NAME ASTCENC_VENEER_TYPE)
ASTCENC_POPCNT=0
ASTCENC_F16C=0)

if(${ASTCENC_BIG_ENDIAN})
target_compile_definitions(${ASTCENC_TARGET_NAME}
PRIVATE
ASTCENC_BIG_ENDIAN=1)
endif()

elseif(${ASTCENC_ISA_SIMD} MATCHES "neon")
target_compile_definitions(${ASTCENC_TARGET_NAME}
PRIVATE
Expand Down