@@ -128,43 +128,43 @@ __m128i vectorcall mm_Flip(const __m128i OP, int pos)
128128
129129 // right: look for player (or edge) bit with lzcnt
130130 __m256i rM = lrmask [pos ].v4 [0 ];
131- #if 1 // use prove
131+ #if 1 // use mask by acepck
132+ __m256i rP = _mm256_and_si256 (PP , rM );
133+ // shadow mask lower than leftmost P
134+ __m256i t0 = _mm256_srlv_epi64 (minusone , _mm256_lzcnt_epi64 (rP ));
135+ // apply flip if leftmost non-opponent is P
136+ // __m256i rE = _mm256_andnot_si256(_mm256_or_si256(OO, PP), rM);
137+ __m256i rE = _mm256_ternarylogic_epi64 (OO , PP , rM , 0x02 ); // masked empty
138+ // __m256i FF = _mm256_maskz_andnot_epi64(_mm256_cmpgt_epi64_mask(rP, rE), t0, rM);
139+ __m256i FF = _mm256_ternarylogic_epi64 (_mm256_cmpgt_epi64 (rP , rE ), t0 , rM , 0x20 );
140+
141+ #else // use prove
132142 __m256i msb = _mm256_slli_epi64 (minusone , 63 );
133143 // outflank P, or 0 if leftmost non-opponent is empty
134144 __m256i t0 = _mm256_and_si256 (_mm256_srlv_epi64 (msb , _mm256_lzcnt_epi64 (_mm256_andnot_si256 (OO , rM ))), PP );
135145 // clear masked OO lower than outflank
136146 // __m256i FF = _mm256_andnot_si256(_mm256_add_epi64(t0, minusone), _mm256_and_si256(OO, rM));
137147 __m256i FF = _mm256_ternarylogic_epi64 (_mm256_add_epi64 (t0 , minusone ), OO , rM , 0x08 );
138-
139- #else // use mask by acepck
140- __m256i rP = _mm256_and_si256 (PP , rM );
141- // shadow mask lower than leftmost P
142- __m256i t0 = _mm256_srlv_epi64 (minusone , _mm256_lzcnt_epi64 (rP ));
143- // apply flip if leftmost non-opponent is P
144- // __m256i rE = _mm256_andnot_si256(OO, _mm256_andnot_si256(rP, rM));
145- __m256i rE = _mm256_ternarylogic_epi64 (OO , rM , rP , 0x04 ); // masked empty
146- // __m256i FF = _mm256_and_si256(_mm256_cmpgt_epi64(rP, rE), _mm256_andnot_epi64(t0, rM));
147- __m256i FF = _mm256_ternarylogic_epi64 (_mm256_cmpgt_epi64 (rP , rE ), t0 , rM , 0x20 );
148148 #endif
149149 // left: look for non-opponent LS1B
150150 __m256i lM = lrmask [pos ].v4 [1 ];
151151 __m256i lO = _mm256_andnot_si256 (OO , lM );
152- #if 1 // LS1B
152+ #if 1 // BLSMSK
153+ // __m256i t2 = _mm256_xor_si256(_mm256_add_epi64(lO, minusone), lO); // BLSMSK
154+ // t2 = _mm256_and_si256(lM, t2); // non-opponent LS1B and opponent inbetween
155+ __m256i t2 = _mm256_ternarylogic_epi64 (lM , _mm256_add_epi64 (lO , minusone ), lO , 0x60 );
156+ // apply flip if P is in mask, i.e. LS1B is P
157+ // FF = _mm256_mask_or_epi64(FF, _mm256_test_epi64_mask(PP, t2), FF, _mm256_andnot_si256(PP, t2));
158+ FF = _mm256_mask_ternarylogic_epi64 (FF , _mm256_test_epi64_mask (PP , t2 ), PP , t2 , 0xf2 );
159+
160+ #else // LS1B
153161 // lO = _mm256_and_si256(lO, _mm256_sub_epi64(_mm256_setzero_si256(), lO)); // LS1B
154162 // lO = _mm256_and_si256(lO, PP);
155163 lO = _mm256_ternarylogic_epi64 (lO , _mm256_sub_epi64 (_mm256_setzero_si256 (), lO ), PP , 0x80 );
156164 // set all bits if outflank = 0, otherwise higher bits than outflank
157165 __m256i lE = _mm256_sub_epi64 (_mm256_cmpeq_epi64 (lO , _mm256_setzero_si256 ()), lO );
158166 // FF = _mm256_or_si256(FF, _mm256_andnot_si256(lE, lM));
159167 FF = _mm256_ternarylogic_epi64 (FF , lE , lM , 0xf2 );
160-
161- #else // BLSMSK
162- // __m256i t2 = _mm256_xor_si256(_mm256_add_epi64(lO, minusone), lO); // BLSMSK
163- // t2 = _mm256_and_si256(lM, t2); // non-opponent LS1B and opponent inbetween
164- __m256i t2 = _mm256_ternarylogic_epi64 (lM , _mm256_add_epi64 (lO , minusone ), lO , 0x60 );
165- // apply flip if P is in mask, i.e. LS1B is P
166- // FF = _mm256_mask_or_epi64(FF, _mm256_test_epi64_mask(PP, t2), FF, _mm256_andnot_si256(PP, t2));
167- FF = _mm256_mask_ternarylogic_epi64 (FF , _mm256_test_epi64_mask (PP , t2 ), PP , t2 , 0xf2 );
168168 #endif
169169
170170 return _mm_or_si128 (_mm256_castsi256_si128 (FF ), _mm256_extracti128_si256 (FF , 1 ));
0 commit comments