NoerrortestAVX working

Nick-Nuon · Nick-Nuon · commit c5e400437d38 · 2024-04-26T06:31:44.000-04:00
diff --git a/src/UTF8.cs b/src/UTF8.cs
@@ -500,6 +500,9 @@ public static class UTF8
             int TailScalarCodeUnitCountAdjustment = 0;
             int TailUtf16CodeUnitCountAdjustment = 0;
 
+            bool prevWasSimd = false;
+
+
             if (pInputBuffer == null || inputLength <= 0)
             {
                 utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment;
@@ -606,7 +609,6 @@ public static class UTF8
                     Vector256<byte> v0f = Vector256.Create((byte)0x0F);
                     Vector256<byte> v80 = Vector256.Create((byte)0x80);
 
-                    bool prevWasSimd = false;
 
                     for (; processedLength + 32 <= inputLength; processedLength += 32)
                     {
@@ -938,6 +940,29 @@ public static class UTF8
                 Console.WriteLine("TempUTF16 after tail remaining check:"+ TempUtf16CodeUnitCountAdjustment);
                 Console.WriteLine("TempScalar '' '' '':"+ TempScalarCountAdjustment);
 
+            } else if (processedLength == inputLength && prevWasSimd){
+                for(int k = 0; k < 3; k++)
+                {
+                // There is no error here hence the loop is straigthforward and we avoid double counting every byte                     
+                    int candidateByte = pInputBuffer[processedLength - k];
+                    if ((candidateByte & 0b11000000) == 0b11000000)
+                    {
+                        if ((candidateByte & 0b11100000) == 0b11000000) // Start of a 2-byte sequence
+                        {
+                            TempUtf16CodeUnitCountAdjustment -= 1; 
+                        }
+                        if ((candidateByte & 0b11110000) == 0b11100000) // Start of a 3-byte sequence
+                        {
+                            TempUtf16CodeUnitCountAdjustment -= 2; 
+                        }
+                        if ((candidateByte & 0b11111000) == 0b11110000) // Start of a 4-byte sequence
+                        {
+                            TempUtf16CodeUnitCountAdjustment -= 2;
+                            TempScalarCountAdjustment -= 1;
+                        }
+                        break;
+                    }
+                }
             }
 
             utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment;
diff --git a/test/UTF8ValidationTests.cs b/test/UTF8ValidationTests.cs
@@ -325,8 +325,17 @@ private void RunTestForByteLength(int byteLength,Utf8ValidationDelegate utf8Vali
             {
                 byte[] utf8 = generator.Generate(outputLength, byteLength).ToArray();
                 bool isValidUtf8 = ValidateUtf8(utf8,utf8ValidationDelegate);
-                Assert.True(isValidUtf8, $"Failure for {byteLength}-byte UTF8 of length {outputLength} in trial {trial}");
-                ValidateCount(utf8,utf8ValidationDelegate);
+                try
+                {
+                    Assert.True(isValidUtf8, $"Failure NoErrorTest. ");
+                    ValidateCount(utf8,utf8ValidationDelegate);
+                }
+                catch (Xunit.Sdk.XunitException)
+                {
+                    Console.WriteLine($"Test failed for {byteLength}-byte unit ");
+                    PrintHexAndBinary(utf8);
+                    throw; // Rethrow the exception to fail the test.
+                }
             }
         }
     }

Original file line number	Diff line number	Diff line change
`@@ -325,8 +325,17 @@ private void RunTestForByteLength(int byteLength,Utf8ValidationDelegate utf8Vali`
`325`	`325`	`{`
`326`	`326`	`byte[] utf8 = generator.Generate(outputLength, byteLength).ToArray();`
`327`	`327`	`bool isValidUtf8 = ValidateUtf8(utf8,utf8ValidationDelegate);`
`328`		`- Assert.True(isValidUtf8, $"Failure for {byteLength}-byte UTF8 of length {outputLength} in trial {trial}");`
`329`		`- ValidateCount(utf8,utf8ValidationDelegate);`
	`328`	`+ try`
	`329`	`+ {`
	`330`	`+ Assert.True(isValidUtf8, $"Failure NoErrorTest. ");`
	`331`	`+ ValidateCount(utf8,utf8ValidationDelegate);`
	`332`	`+ }`
	`333`	`+ catch (Xunit.Sdk.XunitException)`
	`334`	`+ {`
	`335`	`+ Console.WriteLine($"Test failed for {byteLength}-byte unit ");`
	`336`	`+ PrintHexAndBinary(utf8);`
	`337`	`+ throw; // Rethrow the exception to fail the test.`
	`338`	`+ }`
`330`	`339`	`}`
`331`	`340`	`}`
`332`	`341`	`}`