Skip to content

Commit 3398cf7

Browse files
committed
Use acepck and blsmsk for flip_avx512cd
1 parent 619a302 commit 3398cf7

File tree

1 file changed

+20
-20
lines changed

1 file changed

+20
-20
lines changed

src/flip_avx512cd.c

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -128,43 +128,43 @@ __m128i vectorcall mm_Flip(const __m128i OP, int pos)
128128

129129
// right: look for player (or edge) bit with lzcnt
130130
__m256i rM = lrmask[pos].v4[0];
131-
#if 1 // use prove
131+
#if 1 // use mask by acepck
132+
__m256i rP = _mm256_and_si256(PP, rM);
133+
// shadow mask lower than leftmost P
134+
__m256i t0 = _mm256_srlv_epi64(minusone, _mm256_lzcnt_epi64(rP));
135+
// apply flip if leftmost non-opponent is P
136+
// __m256i rE = _mm256_andnot_si256(_mm256_or_si256(OO, PP), rM);
137+
__m256i rE = _mm256_ternarylogic_epi64(OO, PP, rM, 0x02); // masked empty
138+
// __m256i FF = _mm256_maskz_andnot_epi64(_mm256_cmpgt_epi64_mask(rP, rE), t0, rM);
139+
__m256i FF = _mm256_ternarylogic_epi64(_mm256_cmpgt_epi64(rP, rE), t0, rM, 0x20);
140+
141+
#else // use prove
132142
__m256i msb = _mm256_slli_epi64(minusone, 63);
133143
// outflank P, or 0 if leftmost non-opponent is empty
134144
__m256i t0 = _mm256_and_si256(_mm256_srlv_epi64(msb, _mm256_lzcnt_epi64(_mm256_andnot_si256(OO, rM))), PP);
135145
// clear masked OO lower than outflank
136146
// __m256i FF = _mm256_andnot_si256(_mm256_add_epi64(t0, minusone), _mm256_and_si256(OO, rM));
137147
__m256i FF = _mm256_ternarylogic_epi64(_mm256_add_epi64(t0, minusone), OO, rM, 0x08);
138-
139-
#else // use mask by acepck
140-
__m256i rP = _mm256_and_si256(PP, rM);
141-
// shadow mask lower than leftmost P
142-
__m256i t0 = _mm256_srlv_epi64(minusone, _mm256_lzcnt_epi64(rP));
143-
// apply flip if leftmost non-opponent is P
144-
// __m256i rE = _mm256_andnot_si256(OO, _mm256_andnot_si256(rP, rM));
145-
__m256i rE = _mm256_ternarylogic_epi64(OO, rM, rP, 0x04); // masked empty
146-
// __m256i FF = _mm256_and_si256(_mm256_cmpgt_epi64(rP, rE), _mm256_andnot_epi64(t0, rM));
147-
__m256i FF = _mm256_ternarylogic_epi64(_mm256_cmpgt_epi64(rP, rE), t0, rM, 0x20);
148148
#endif
149149
// left: look for non-opponent LS1B
150150
__m256i lM = lrmask[pos].v4[1];
151151
__m256i lO = _mm256_andnot_si256(OO, lM);
152-
#if 1 // LS1B
152+
#if 1 // BLSMSK
153+
// __m256i t2 = _mm256_xor_si256(_mm256_add_epi64(lO, minusone), lO); // BLSMSK
154+
// t2 = _mm256_and_si256(lM, t2); // non-opponent LS1B and opponent inbetween
155+
__m256i t2 = _mm256_ternarylogic_epi64(lM, _mm256_add_epi64(lO, minusone), lO, 0x60);
156+
// apply flip if P is in mask, i.e. LS1B is P
157+
// FF = _mm256_mask_or_epi64(FF, _mm256_test_epi64_mask(PP, t2), FF, _mm256_andnot_si256(PP, t2));
158+
FF = _mm256_mask_ternarylogic_epi64(FF, _mm256_test_epi64_mask(PP, t2), PP, t2, 0xf2);
159+
160+
#else // LS1B
153161
// lO = _mm256_and_si256(lO, _mm256_sub_epi64(_mm256_setzero_si256(), lO)); // LS1B
154162
// lO = _mm256_and_si256(lO, PP);
155163
lO = _mm256_ternarylogic_epi64(lO, _mm256_sub_epi64(_mm256_setzero_si256(), lO), PP, 0x80);
156164
// set all bits if outflank = 0, otherwise higher bits than outflank
157165
__m256i lE = _mm256_sub_epi64(_mm256_cmpeq_epi64(lO, _mm256_setzero_si256()), lO);
158166
// FF = _mm256_or_si256(FF, _mm256_andnot_si256(lE, lM));
159167
FF = _mm256_ternarylogic_epi64(FF, lE, lM, 0xf2);
160-
161-
#else // BLSMSK
162-
// __m256i t2 = _mm256_xor_si256(_mm256_add_epi64(lO, minusone), lO); // BLSMSK
163-
// t2 = _mm256_and_si256(lM, t2); // non-opponent LS1B and opponent inbetween
164-
__m256i t2 = _mm256_ternarylogic_epi64(lM, _mm256_add_epi64(lO, minusone), lO, 0x60);
165-
// apply flip if P is in mask, i.e. LS1B is P
166-
// FF = _mm256_mask_or_epi64(FF, _mm256_test_epi64_mask(PP, t2), FF, _mm256_andnot_si256(PP, t2));
167-
FF = _mm256_mask_ternarylogic_epi64(FF, _mm256_test_epi64_mask(PP, t2), PP, t2, 0xf2);
168168
#endif
169169

170170
return _mm_or_si128(_mm256_castsi256_si128(FF), _mm256_extracti128_si256(FF, 1));

0 commit comments

Comments
 (0)