Skip to content

Commit 033f75c

Browse files
committed
Remove AVX512 usage and appease to the CI gods
1 parent 0b32a09 commit 033f75c

File tree

2 files changed

+22
-16
lines changed

2 files changed

+22
-16
lines changed

src/gui/widgets/memory_observer.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,10 +292,10 @@ int PCSX::Widgets::MemoryObserver::getMemValue(uint32_t absoluteAddress, const u
292292
// Check if all bytes in a 256-bit vector are equal
293293
// Broadcasts byte 0 of the vector to 256 bits, then xors the result with the starting vector
294294
// If the resulting vector is 0, then all bytes in the 256-bit vector are equal
295-
bool PCSX::Widgets::MemoryObserver::all_equal(__m256i vec) {
295+
AVX2_FUNC bool PCSX::Widgets::MemoryObserver::all_equal(__m256i vec) {
296296
const __m128i vec128 = _mm256_castsi256_si128(vec);
297297
const __m256i broadcasted = _mm256_broadcastb_epi8(vec128);
298-
const __m256i res = _mm256_xor_epi32(vec, broadcasted);
298+
const __m256i res = _mm256_xor_si256(vec, broadcasted);
299299

300300
// Check if the vector after xoring is 0
301301
return _mm256_testz_si256(res, res) != 0;

src/gui/widgets/memory_observer.h

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,18 @@
2121

2222
#include <array>
2323
#include <cstdint>
24+
#include <cstring>
2425
#include <stdexcept>
2526
#include <vector>
2627

2728
#include "imgui.h"
2829
#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64) || defined(_M_AMD64)
2930
#define MEMORY_OBSERVER_X86 // Do not include immintrin/xbyak or use avx intrinsics unless we're compiling for x86
31+
#ifdef __GNUC__
32+
#define AVX2_FUNC [[gnu::target("avx2")]]
33+
#else
34+
#define AVX2_FUNC
35+
#endif
3036
#include "immintrin.h"
3137
#endif
3238

@@ -79,14 +85,14 @@ class MemoryObserver {
7985

8086
#ifdef MEMORY_OBSERVER_X86
8187
template <int bufferSize>
82-
static __m256i avx2_getShuffleResultsFor(const std::array<uint8_t, bufferSize>& buffer,
88+
AVX2_FUNC static __m256i avx2_getShuffleResultsFor(const std::array<uint8_t, bufferSize>& buffer,
8389
std::array<uint8_t, 32>& extendedBuffer, int mask) {
8490
static_assert(bufferSize == 8 || bufferSize == 16);
8591

8692
for (auto j = 0u; j < (32 / bufferSize); ++j) {
87-
std::ranges::copy(buffer, extendedBuffer.begin() + j * bufferSize);
93+
std::memcpy(&extendedBuffer[j * bufferSize], &buffer[0], bufferSize);
8894
}
89-
const auto copies = _mm256_loadu_epi8(extendedBuffer.data());
95+
const auto copies = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(extendedBuffer.data()));
9096

9197
switch (mask) {
9298
case 0: {
@@ -122,40 +128,40 @@ class MemoryObserver {
122128
}
123129

124130
template <int bufferSize>
125-
void simd_populateAddressList(const uint8_t* memData, uint32_t memBase, uint32_t memSize) {
131+
AVX2_FUNC void simd_populateAddressList(const uint8_t* memData, uint32_t memBase, uint32_t memSize) {
126132
static_assert(bufferSize == 8 || bufferSize == 16);
127133

128134
alignas(32) auto buffer = std::array<uint8_t, bufferSize>{};
129135
alignas(32) auto extendedBuffer = std::array<uint8_t, 32>{};
130136

131137
const auto sequenceSize = m_sequenceSize;
132138
std::copy_n(m_sequence, sequenceSize, buffer.data());
133-
auto patternShuffleResults = std::vector<__m256i>{avx2_getShuffleResultsFor(buffer, extendedBuffer, 0),
134-
avx2_getShuffleResultsFor(buffer, extendedBuffer, 1)};
139+
auto patternShuffleResults = std::vector<__m256i>{avx2_getShuffleResultsFor<bufferSize>(buffer, extendedBuffer, 0),
140+
avx2_getShuffleResultsFor<bufferSize>(buffer, extendedBuffer, 1)};
135141
if constexpr (bufferSize == 16) {
136-
patternShuffleResults.push_back(avx2_getShuffleResultsFor(buffer, extendedBuffer, 2));
137-
patternShuffleResults.push_back(avx2_getShuffleResultsFor(buffer, extendedBuffer, 3));
142+
patternShuffleResults.push_back(avx2_getShuffleResultsFor<bufferSize>(buffer, extendedBuffer, 2));
143+
patternShuffleResults.push_back(avx2_getShuffleResultsFor<bufferSize>(buffer, extendedBuffer, 3));
138144
}
139145

140146
m_addresses.clear();
141147
for (auto i = 0u; i + sequenceSize < memSize; i += m_step) {
142148
std::copy_n(memData + i, sequenceSize, buffer.data());
143149

144-
bool bAllEqual = true;
150+
bool allEqual = true;
145151
for (auto j = 0u; j < patternShuffleResults.size(); ++j) {
146-
bAllEqual = all_equal(
147-
_mm256_cmpeq_epi8(patternShuffleResults[j], avx2_getShuffleResultsFor(buffer, extendedBuffer, j)));
148-
if (!bAllEqual) {
152+
allEqual = all_equal(
153+
_mm256_cmpeq_epi8(patternShuffleResults[j], avx2_getShuffleResultsFor<bufferSize>(buffer, extendedBuffer, j)));
154+
if (!allEqual) {
149155
break;
150156
}
151157
}
152158

153-
if (bAllEqual) {
159+
if (allEqual) {
154160
m_addresses.push_back(memBase + i);
155161
}
156162
}
157163
}
158-
static bool all_equal(__m256i input);
164+
AVX2_FUNC static bool all_equal(__m256i input);
159165
#else
160166
template <int bufferSize>
161167
void simd_populateAddressList(const uint8_t* memData, uint32_t memBase, uint32_t memSize) {

0 commit comments

Comments
 (0)