Skip to content

Commit 134f2e3

Browse files
committed
Removed unnecessary SSE4.1 instructions
now purely SSE2 instructions used (in fact it's SSE only)
1 parent f9ec2f8 commit 134f2e3

File tree

4 files changed

+5
-14
lines changed

4 files changed

+5
-14
lines changed

configure

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@ options:
2929
3030
--target=TARGET set target instruction set
3131
- sse2
32-
- sse4.1
33-
[sse4.1]
32+
[sse2]
3433
3534
EOF
3635
exit 1
@@ -96,7 +95,7 @@ CXXFLAGS="-std=c++11 -Wall -Wno-unused-local-typedefs -I. -I$SRCDIR/include -DND
9695
LDFLAGS=""
9796
SOFLAGS="-shared"
9897
DEPLIBS="fftw3f"
99-
TARGET="-msse4.1"
98+
TARGET="-msse2"
10099

101100
# -- options ----------------------------------------------------------------
102101
echo all command lines: > config.log

msvc/Includes.props

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<ItemDefinitionGroup>
77
<ClCompile>
88
<AdditionalIncludeDirectories>..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
9-
<PreprocessorDefinitions>__SSE2__;__SSE4_1__;%(PreprocessorDefinitions)</PreprocessorDefinitions>
9+
<PreprocessorDefinitions>__SSE2__;%(PreprocessorDefinitions)</PreprocessorDefinitions>
1010
</ClCompile>
1111
</ItemDefinitionGroup>
1212
<ItemGroup />

source/BM3D_Basic.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,7 @@ void BM3D_Basic_Process::CollaborativeFilter(int plane,
101101
const __m128 cmp2 = _mm_cmplt_ps(s1, t1n);
102102
const __m128 cmp = _mm_or_ps(cmp1, cmp2);
103103

104-
#if defined(__SSE4_1__)
105-
const __m128 d1 = _mm_blendv_ps(zero_ps, s1, cmp);
106-
#else
107-
const __m128 d1 = _mm_or_ps(_mm_and_ps(cmp, s1), _mm_andnot_ps(cmp, zero_ps));
108-
#endif
104+
const __m128 d1 = _mm_and_ps(cmp, s1);
109105
_mm_store_ps(srcp, d1);
110106
cmp_sum = _mm_sub_epi32(cmp_sum, _mm_castps_si128(cmp));
111107
}

source/VBM3D_Basic.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,7 @@ void VBM3D_Basic_Process::CollaborativeFilter(int plane,
101101
const __m128 cmp2 = _mm_cmplt_ps(s1, t1n);
102102
const __m128 cmp = _mm_or_ps(cmp1, cmp2);
103103

104-
#if defined(__SSE4_1__)
105-
const __m128 d1 = _mm_blendv_ps(zero_ps, s1, cmp);
106-
#else
107-
const __m128 d1 = _mm_or_ps(_mm_and_ps(cmp, s1), _mm_andnot_ps(cmp, zero_ps));
108-
#endif
104+
const __m128 d1 = _mm_and_ps(cmp, s1);
109105
_mm_store_ps(srcp, d1);
110106
cmp_sum = _mm_sub_epi32(cmp_sum, _mm_castps_si128(cmp));
111107
}

0 commit comments

Comments
 (0)