Skip to content

Commit bf4f6b3

Browse files
author
Daniel Lemire
committed
fix: optimize the avx2 validator (shaving one SIMD instruction).
1 parent 6f34ead commit bf4f6b3

File tree

1 file changed

+9
-16
lines changed

1 file changed

+9
-16
lines changed

src/UTF8.cs

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -671,22 +671,15 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
671671
contbytes += tempcont;
672672
}
673673

674-
// (Nick Nuon)The counts for continuous bytes can probably be optimized:
675-
// The draft had something like this line:
676-
// contbytes += (int)Popcnt.PopCount((uint)Avx2.MoveMask(sc));
677-
// this actually counts the number of 2 consecutive continuous bytes
678-
// I put something that was bound to be working regardless as a slow but temporary fix:
679-
680-
Vector256<byte> top2bits = Vector256.Create((byte)0b11000000); // Mask to isolate the two most significant bits
681-
Vector256<byte> contbytemask = Vector256.Create((byte)0b10000000); // The expected pattern for continuation bytes: 10xxxxxx
682-
683-
// Apply the mask and compare
684-
Vector256<byte> maskedData = Avx2.And(currentBlock, top2bits);
685-
Vector256<byte> compareResult = Avx2.CompareEqual(maskedData, contbytemask);
686-
// Move mask to get integer representation
687-
contbytes += (int)Popcnt.PopCount((uint)Avx2.MoveMask(compareResult));
688-
689-
674+
// We update the continuation bytes count using just one SIMD instruction (Avx2.CompareGreaterThan).
675+
// Then we need popcount to count the number of continuation bytes and some arithmetic operations.
676+
// We use the fact that as two's complement, -65 is 0b10111111, so we can use CompareGreaterThan
677+
// to find continuation bytes: any byte greater than -65 is a not continuation byte. E.g., the next one
678+
// is 0b11111110 (-64) and so forth. The smallest possible value is -128, which is 0b10000000.
679+
680+
Vector256<sbyte> largestcont = Vector256.Create((sbyte)-65); // -65 => 0b10111111
681+
uint noncont = (uint)Avx2.MoveMask(Avx2.CompareGreaterThan(Vector256.AsSByte(currentBlock), largestcont));
682+
contbytes += (int)(32-Popcnt.PopCount(noncont));
690683

691684
// We use two instructions (SubtractSaturate and MoveMask) to update n4, with one arithmetic operation.
692685
n4 += (int)Popcnt.PopCount((uint)Avx2.MoveMask(Avx2.SubtractSaturate(currentBlock, fourthByte)));

0 commit comments

Comments
 (0)