Skip to content

Commit 5a99bb2

Browse files
authored
Merge pull request #36 from simdutf/optimize_avx2
fix: optimize the avx2 validator (shaving one SIMD instruction).
2 parents e5f5b39 + 71b6fe2 commit 5a99bb2

File tree

1 file changed

+1
-17
lines changed

1 file changed

+1
-17
lines changed

src/UTF8.cs

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -671,23 +671,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
671671
contbytes += tempcont;
672672
}
673673

674-
// (Nick Nuon)The counts for continuous bytes can probably be optimized:
675-
// The draft had something like this line:
676-
// contbytes += (int)Popcnt.PopCount((uint)Avx2.MoveMask(sc));
677-
// this actually counts the number of 2 consecutive continuous bytes
678-
// I put something that was bound to be working regardless as a slow but temporary fix:
679-
680-
Vector256<byte> top2bits = Vector256.Create((byte)0b11000000); // Mask to isolate the two most significant bits
681-
Vector256<byte> contbytemask = Vector256.Create((byte)0b10000000); // The expected pattern for continuation bytes: 10xxxxxx
682-
683-
// Apply the mask and compare
684-
Vector256<byte> maskedData = Avx2.And(currentBlock, top2bits);
685-
Vector256<byte> compareResult = Avx2.CompareEqual(maskedData, contbytemask);
686-
// Move mask to get integer representation
687-
contbytes += (int)Popcnt.PopCount((uint)Avx2.MoveMask(compareResult));
688-
689-
690-
674+
contbytes += (int)Popcnt.PopCount((uint)Avx2.MoveMask(byte_2_high));
691675
// We use two instructions (SubtractSaturate and MoveMask) to update n4, with one arithmetic operation.
692676
n4 += (int)Popcnt.PopCount((uint)Avx2.MoveMask(Avx2.SubtractSaturate(currentBlock, fourthByte)));
693677
}

0 commit comments

Comments
 (0)