Skip to content

Commit c5e4004

Browse files
committed
NoerrortestAVX working
1 parent f3f2f9d commit c5e4004

File tree

2 files changed

+37
-3
lines changed

2 files changed

+37
-3
lines changed

src/UTF8.cs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,9 @@ public static class UTF8
500500
int TailScalarCodeUnitCountAdjustment = 0;
501501
int TailUtf16CodeUnitCountAdjustment = 0;
502502

503+
bool prevWasSimd = false;
504+
505+
503506
if (pInputBuffer == null || inputLength <= 0)
504507
{
505508
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment;
@@ -606,7 +609,6 @@ public static class UTF8
606609
Vector256<byte> v0f = Vector256.Create((byte)0x0F);
607610
Vector256<byte> v80 = Vector256.Create((byte)0x80);
608611

609-
bool prevWasSimd = false;
610612

611613
for (; processedLength + 32 <= inputLength; processedLength += 32)
612614
{
@@ -938,6 +940,29 @@ public static class UTF8
938940
Console.WriteLine("TempUTF16 after tail remaining check:"+ TempUtf16CodeUnitCountAdjustment);
939941
Console.WriteLine("TempScalar '' '' '':"+ TempScalarCountAdjustment);
940942

943+
} else if (processedLength == inputLength && prevWasSimd){
944+
for(int k = 0; k < 3; k++)
945+
{
946+
// There is no error here hence the loop is straigthforward and we avoid double counting every byte
947+
int candidateByte = pInputBuffer[processedLength - k];
948+
if ((candidateByte & 0b11000000) == 0b11000000)
949+
{
950+
if ((candidateByte & 0b11100000) == 0b11000000) // Start of a 2-byte sequence
951+
{
952+
TempUtf16CodeUnitCountAdjustment -= 1;
953+
}
954+
if ((candidateByte & 0b11110000) == 0b11100000) // Start of a 3-byte sequence
955+
{
956+
TempUtf16CodeUnitCountAdjustment -= 2;
957+
}
958+
if ((candidateByte & 0b11111000) == 0b11110000) // Start of a 4-byte sequence
959+
{
960+
TempUtf16CodeUnitCountAdjustment -= 2;
961+
TempScalarCountAdjustment -= 1;
962+
}
963+
break;
964+
}
965+
}
941966
}
942967

943968
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment;

test/UTF8ValidationTests.cs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,17 @@ private void RunTestForByteLength(int byteLength,Utf8ValidationDelegate utf8Vali
325325
{
326326
byte[] utf8 = generator.Generate(outputLength, byteLength).ToArray();
327327
bool isValidUtf8 = ValidateUtf8(utf8,utf8ValidationDelegate);
328-
Assert.True(isValidUtf8, $"Failure for {byteLength}-byte UTF8 of length {outputLength} in trial {trial}");
329-
ValidateCount(utf8,utf8ValidationDelegate);
328+
try
329+
{
330+
Assert.True(isValidUtf8, $"Failure NoErrorTest. ");
331+
ValidateCount(utf8,utf8ValidationDelegate);
332+
}
333+
catch (Xunit.Sdk.XunitException)
334+
{
335+
Console.WriteLine($"Test failed for {byteLength}-byte unit ");
336+
PrintHexAndBinary(utf8);
337+
throw; // Rethrow the exception to fail the test.
338+
}
330339
}
331340
}
332341
}

0 commit comments

Comments
 (0)