@@ -671,22 +671,15 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
671
671
contbytes += tempcont ;
672
672
}
673
673
674
- // (Nick Nuon)The counts for continuous bytes can probably be optimized:
675
- // The draft had something like this line:
676
- // contbytes += (int)Popcnt.PopCount((uint)Avx2.MoveMask(sc));
677
- // this actually counts the number of 2 consecutive continuous bytes
678
- // I put something that was bound to be working regardless as a slow but temporary fix:
679
-
680
- Vector256 < byte > top2bits = Vector256 . Create ( ( byte ) 0b11000000 ) ; // Mask to isolate the two most significant bits
681
- Vector256 < byte > contbytemask = Vector256 . Create ( ( byte ) 0b10000000 ) ; // The expected pattern for continuation bytes: 10xxxxxx
682
-
683
- // Apply the mask and compare
684
- Vector256 < byte > maskedData = Avx2 . And ( currentBlock , top2bits ) ;
685
- Vector256 < byte > compareResult = Avx2 . CompareEqual ( maskedData , contbytemask ) ;
686
- // Move mask to get integer representation
687
- contbytes += ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( compareResult ) ) ;
688
-
689
-
674
+ // We update the continuation bytes count using just one SIMD instruction (Avx2.CompareGreaterThan).
675
+ // Then we need popcount to count the number of continuation bytes and some arithmetic operations.
676
+ // We use the fact that as two's complement, -65 is 0b10111111, so we can use CompareGreaterThan
677
+ // to find continuation bytes: any byte greater than -65 is a not continuation byte. E.g., the next one
678
+ // is 0b11111110 (-64) and so forth. The smallest possible value is -128, which is 0b10000000.
679
+
680
+ Vector256 < sbyte > largestcont = Vector256 . Create ( ( sbyte ) - 65 ) ; // -65 => 0b10111111
681
+ uint noncont = ( uint ) Avx2 . MoveMask ( Avx2 . CompareGreaterThan ( Vector256 . AsSByte ( currentBlock ) , largestcont ) ) ;
682
+ contbytes += ( int ) ( 32 - Popcnt . PopCount ( noncont ) ) ;
690
683
691
684
// We use two instructions (SubtractSaturate and MoveMask) to update n4, with one arithmetic operation.
692
685
n4 += ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( Avx2 . SubtractSaturate ( currentBlock , fourthByte ) ) ) ;
0 commit comments