Skip to content

Commit 5d47527

Browse files
committed
fix error + clearer erre name
1 parent ab8b95c commit 5d47527

File tree

2 files changed

+55
-136
lines changed

2 files changed

+55
-136
lines changed

src/UTF8.cs

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -133,21 +133,7 @@ public unsafe static void AdjustForSkippedBytes(byte* pInputBuffer,// int skippe
133133

134134
while (pos < inputLength)
135135
{
136-
// If the next 16 bytes are ascii, we can skip them.
137-
nextPos = pos + 16;
138-
if (nextPos <= inputLength)
139-
{ // if it is safe to read 16 more bytes, check that they are ascii
140-
ulong v1 = *(ulong*)pInputBuffer;
141-
ulong v2 = *(ulong*)(pInputBuffer + 8);
142-
ulong v = v1 | v2;
143-
144-
if ((v & 0x8080808080808080) == 0)
145-
{
146-
pos = nextPos;
147-
continue;
148-
}
149136

150-
}
151137

152138
byte firstByte = pInputBuffer[pos];
153139
while (firstByte < 0b10000000)
@@ -651,8 +637,8 @@ public unsafe static void AdjustForSkippedBytes(byte* pInputBuffer,// int skippe
651637
int candidateByte = pInputBuffer[processedLength + k];
652638
if ((candidateByte & 0b11000000) == 0b11000000)
653639
{
654-
// if (k != 0)
655-
if (true)
640+
if (k != 0)
641+
// if (true)
656642
{
657643
if ((candidateByte & 0b11100000) == 0b11000000) // Start of a 2-byte sequence
658644
{

test/UTF8ValidationTests.cs

Lines changed: 53 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,7 @@ public void TooLargeError(Utf8ValidationDelegate utf8ValidationDelegate)
877877
utf8[i] += (byte)(((utf8[i] & 0b100) == 0b100) ? 0b10 : 0b100);
878878

879879
Assert.False(ValidateUtf8(utf8,utf8ValidationDelegate));
880-
Assert.True(InvalidateUtf8(utf8, i,utf8ValidationDelegate));
880+
Assert.True(InvalidateUtf8(utf8, i+1,utf8ValidationDelegate));
881881
ValidateCount(utf8,utf8ValidationDelegate);
882882
utf8[i] = old;
883883
}
@@ -926,8 +926,7 @@ public void TooLargeErrorAvx()
926926
}
927927

928928

929-
// TODO: improve this test
930-
public void TooLargeErrorAtEnd(Utf8ValidationDelegate utf8ValidationDelegate)
929+
public void AsciiPlusContinuationAtEndError(Utf8ValidationDelegate utf8ValidationDelegate)
931930
{
932931
foreach (int outputLength in outputLengths)
933932
{
@@ -941,7 +940,7 @@ public void TooLargeErrorAtEnd(Utf8ValidationDelegate utf8ValidationDelegate)
941940
generator.ReplaceEndOfArray(filler,toolong);
942941

943942
Assert.False(ValidateUtf8(filler,utf8ValidationDelegate));
944-
Assert.True(InvalidateUtf8(filler, outputLength -1,utf8ValidationDelegate));
943+
Assert.True(InvalidateUtf8(filler, filler.Length - 1,utf8ValidationDelegate));
945944
ValidateCount(filler,utf8ValidationDelegate);
946945
}
947946

@@ -952,47 +951,47 @@ public void TooLargeErrorAtEnd(Utf8ValidationDelegate utf8ValidationDelegate)
952951

953952
[Fact]
954953
[Trait("Category", "scalar")]
955-
public void TooLargeErrorAtEndScalar()
954+
public void AsciiPlusContinuationAtEndErrorScalar()
956955
{
957-
TooLargeErrorAtEnd(SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar);
956+
AsciiPlusContinuationAtEndError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar);
958957
}
959958

960959
// TODO:Uncomment when SSE is updated
961960
// [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Sse)]
962961
// [Fact]
963962
// [Trait("Category", "sse")]
964-
// public void TooLargeErrorAtEndSse()
963+
// public void AsciiPlusContinuationAtEndErrorSse()
965964
// {
966-
// TooLargeErrorAtEnd(SimdUnicode.UTF8.GetPointerToFirstInvalidByteSse);
965+
// AsciiPlusContinuationAtEndError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteSse);
967966
// }
968967

969968
// TODO:Uncomment when AVX512 is updated
970969
// [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx512)]
971970
// [Trait("Category", "avx512")]
972-
// public void TooLargeErrorAtEndAvx512()
971+
// public void AsciiPlusContinuationAtEndErrorAvx512()
973972
// {
974-
// TooLargeErrorAtEnd(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx512);
973+
// AsciiPlusContinuationAtEndError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx512);
975974
// }
976975

977976
// TODO:Uncomment when Arm64 is updated
978977
// [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)]
979978
// [Trait("Category", "arm64")]
980-
// public void TooLargeErrorAtEndArm64()
979+
// public void AsciiPlusContinuationAtEndErrorArm64()
981980
// {
982-
// TooLargeErrorAtEnd(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64);
981+
// AsciiPlusContinuationAtEndError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64);
983982
// }
984983

985984
[Fact]
986985
[Trait("Category", "avx")]
987-
public void TooLargeErrorAtEndAVX()
986+
public void AsciiPlusContinuationAtEndErrorAVX()
988987
{
989-
TooLargeErrorAtEnd(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2);
988+
AsciiPlusContinuationAtEndError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2);
990989
}
991990

992991
[Fact]
993-
public void TooLargeErrorAtEndAvx2()
992+
public void AsciiPlusContinuationAtEndErrorAvx2()
994993
{
995-
TooLargeErrorAtEnd(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2);
994+
AsciiPlusContinuationAtEndError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2);
996995
}
997996

998997
public void SurrogateErrorTest(Utf8ValidationDelegate utf8ValidationDelegate)
@@ -1268,26 +1267,6 @@ private bool ValidateUtf8(byte[] utf8,Utf8ValidationDelegate utf8ValidationDeleg
12681267
}
12691268
}
12701269

1271-
// void PrintDebugInfo(byte* failedByte, byte* startPtr, byte[] utf8, string source)
1272-
// {
1273-
// int failedIndex = (int)(failedByte - startPtr);
1274-
// byte failedByteValue = *failedByte;
1275-
// Console.WriteLine($"Failure in {source}: Index {failedIndex}, Byte {failedByteValue:X2}");
1276-
1277-
// // Print surrounding sequence, assuming 5 bytes context around the failure point
1278-
// int contextRadius = 5;
1279-
// int startContext = Math.Max(0, failedIndex - contextRadius);
1280-
// int endContext = Math.Min(utf8.Length, failedIndex + contextRadius + 1); // Include the failed byte and some after
1281-
// Console.Write("Sequence around failure point: ");
1282-
// for (int i = startContext; i < endContext; i++)
1283-
// {
1284-
// Console.Write($"{utf8[i]:X2} ");
1285-
// }
1286-
// Console.WriteLine();
1287-
// }
1288-
1289-
1290-
12911270
// Helper method to calculate the actual offset and length from a Range
12921271
private (int offset, int length) GetOffsetAndLength(int totalLength, Range range)
12931272
{
@@ -1301,47 +1280,6 @@ private bool ValidateUtf8(byte[] utf8,Utf8ValidationDelegate utf8ValidationDeleg
13011280
// Define a delegate that matches the signature of the methods you want to test
13021281
public unsafe delegate byte* Utf8ValidationDelegate(byte* pInputBuffer, int inputLength, out int utf16CodeUnitCountAdjustment, out int scalarCountAdjustment);
13031282

1304-
1305-
1306-
1307-
// public void ValidateCount(byte[] utf8,Utf8ValidationDelegate utf8ValidationDelegate, Range range = default)
1308-
// {
1309-
// int DotnetUtf16Adjustment, DotnetScalarCountAdjustment;
1310-
// int SimdUnicodeUtf16Adjustment, SimdUnicodeScalarCountAdjustment;
1311-
1312-
// var isDefaultRange = range.Equals(default(Range));
1313-
// var (offset, length) = isDefaultRange ? (0, utf8.Length) : GetOffsetAndLength(utf8.Length, range);
1314-
1315-
// unsafe
1316-
// {
1317-
// fixed (byte* pInput = utf8)
1318-
// {
1319-
// byte* startPtr = pInput + offset;
1320-
// // Invoke the method under test.
1321-
1322-
// DotnetUtf16Adjustment= 0;
1323-
// DotnetScalarCountAdjustment= 0;
1324-
// DotnetRuntime.Utf8Utility.GetPointerToFirstInvalidByte(pInput, length, out DotnetUtf16Adjustment, out DotnetScalarCountAdjustment);
1325-
1326-
// SimdUnicodeUtf16Adjustment= 0;
1327-
// SimdUnicodeScalarCountAdjustment= 0;
1328-
// utf8ValidationDelegate(pInput, length, out SimdUnicodeUtf16Adjustment, out SimdUnicodeScalarCountAdjustment);
1329-
1330-
// // Console.WriteLine("DotnetScalar:" + DotnetScalarCountAdjustment);
1331-
// // Console.WriteLine("OurScalar:" + SimdUnicodeScalarCountAdjustment);
1332-
1333-
// // Console.WriteLine("Lenght:" + utf8.Length);
1334-
// // Console.WriteLine("Dotnetutf16:" + DotnetUtf16Adjustment);
1335-
// // Console.WriteLine("Ourutf16:" + SimdUnicodeUtf16Adjustment);
1336-
// // Console.WriteLine("___________________________________________________");
1337-
1338-
// Assert.True(DotnetUtf16Adjustment == SimdUnicodeUtf16Adjustment, $"Expected UTF16 Adjustment: {DotnetUtf16Adjustment}, but got: {SimdUnicodeUtf16Adjustment}.");
1339-
// Assert.True(DotnetScalarCountAdjustment == SimdUnicodeScalarCountAdjustment, $"Expected Scalar Count Adjustment: {DotnetScalarCountAdjustment}, but got: {SimdUnicodeScalarCountAdjustment}.");
1340-
// }
1341-
// }
1342-
// // }
1343-
// }
1344-
13451283
public void ValidateCount(byte[] utf8, Utf8ValidationDelegate utf8ValidationDelegate, Range range = default)
13461284
{
13471285
int DotnetUtf16Adjustment, DotnetScalarCountAdjustment;
@@ -1384,60 +1322,55 @@ public void ValidateCount(byte[] utf8, Utf8ValidationDelegate utf8ValidationDele
13841322
}
13851323

13861324

1387-
[Fact]
1388-
[Trait("Category", "Scalar")]
1389-
public void DotnetUTF16Count()
1390-
{
1391-
int[] outputLengths = { 10, 15, 11,12 ,15,15,1, 3, 5, 8, 10, 12, 15, 18 };
1392-
int DotnetUtf16Adjustment, DotnetScalarCountAdjustment;
1393-
int SimdUnicodeUtf16Adjustment, SimdUnicodeScalarCountAdjustment;
1394-
1395-
1396-
foreach (int outputLength in outputLengths)
1397-
{
1398-
// Generate a UTF-8 sequence with 3 units, each 2 bytes long, presumed to be valid.
1399-
// byte[] utf8 = generator.Generate(howManyUnits: 11, byteCountInUnit: 3).ToArray();
1400-
byte[] utf8 = generator.Generate(howManyUnits: outputLength).ToArray();
1401-
PrintHexAndBinary(utf8);
1402-
var (offset, length) = (0, utf8.Length);
1403-
1404-
unsafe
1405-
{
1406-
fixed (byte* pInput = utf8)
1407-
{
1408-
byte* startPtr = pInput + offset;
1409-
// Invoke the method under test.
1325+
// [Fact]
1326+
// [Trait("Category", "Scalar")]
1327+
// public void DotnetUTF16Count()
1328+
// {
1329+
// int[] outputLengths = { 10, 15, 11,12 ,15,15,1, 3, 5, 8, 10, 12, 15, 18 };
1330+
// int DotnetUtf16Adjustment, DotnetScalarCountAdjustment;
1331+
// int SimdUnicodeUtf16Adjustment, SimdUnicodeScalarCountAdjustment;
14101332

1411-
DotnetUtf16Adjustment= 0;
1412-
DotnetScalarCountAdjustment= 0;
1413-
DotnetRuntime.Utf8Utility.GetPointerToFirstInvalidByte(pInput, length, out DotnetUtf16Adjustment, out DotnetScalarCountAdjustment);
14141333

1415-
SimdUnicodeUtf16Adjustment= 0;
1416-
SimdUnicodeScalarCountAdjustment= 0;
1417-
SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar(pInput, length, out SimdUnicodeUtf16Adjustment, out SimdUnicodeScalarCountAdjustment);
1334+
// foreach (int outputLength in outputLengths)
1335+
// {
1336+
// // Generate a UTF-8 sequence with 3 units, each 2 bytes long, presumed to be valid.
1337+
// // byte[] utf8 = generator.Generate(howManyUnits: 11, byteCountInUnit: 3).ToArray();
1338+
// byte[] utf8 = generator.Generate(howManyUnits: outputLength).ToArray();
1339+
// PrintHexAndBinary(utf8);
1340+
// var (offset, length) = (0, utf8.Length);
14181341

1419-
Console.WriteLine("Lenght:" + utf8.Length);
1342+
// unsafe
1343+
// {
1344+
// fixed (byte* pInput = utf8)
1345+
// {
1346+
// byte* startPtr = pInput + offset;
1347+
// // Invoke the method under test.
14201348

1421-
Console.WriteLine("DotnetScalar:" + DotnetScalarCountAdjustment);
1422-
Console.WriteLine("OurScalar:" + SimdUnicodeScalarCountAdjustment);
1349+
// DotnetUtf16Adjustment= 0;
1350+
// DotnetScalarCountAdjustment= 0;
1351+
// DotnetRuntime.Utf8Utility.GetPointerToFirstInvalidByte(pInput, length, out DotnetUtf16Adjustment, out DotnetScalarCountAdjustment);
14231352

1424-
Console.WriteLine("Dotnetutf16:" + DotnetUtf16Adjustment);
1425-
Console.WriteLine("Ourutf16:" + SimdUnicodeUtf16Adjustment);
1426-
Console.WriteLine("___________________________________________________");
1353+
// SimdUnicodeUtf16Adjustment= 0;
1354+
// SimdUnicodeScalarCountAdjustment= 0;
1355+
// SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar(pInput, length, out SimdUnicodeUtf16Adjustment, out SimdUnicodeScalarCountAdjustment);
14271356

1357+
// Console.WriteLine("Lenght:" + utf8.Length);
14281358

1429-
Assert.True(DotnetUtf16Adjustment == SimdUnicodeUtf16Adjustment, $"Expected UTF16 Adjustment: {DotnetUtf16Adjustment}, but got: {SimdUnicodeUtf16Adjustment}.");
1430-
Assert.True(DotnetScalarCountAdjustment == SimdUnicodeScalarCountAdjustment, $"Expected Scalar Count Adjustment: {DotnetScalarCountAdjustment}, but got: {SimdUnicodeScalarCountAdjustment}.");
1359+
// Console.WriteLine("DotnetScalar:" + DotnetScalarCountAdjustment);
1360+
// Console.WriteLine("OurScalar:" + SimdUnicodeScalarCountAdjustment);
14311361

1362+
// Console.WriteLine("Dotnetutf16:" + DotnetUtf16Adjustment);
1363+
// Console.WriteLine("Ourutf16:" + SimdUnicodeUtf16Adjustment);
1364+
// Console.WriteLine("___________________________________________________");
14321365

14331366

1367+
// Assert.True(DotnetUtf16Adjustment == SimdUnicodeUtf16Adjustment, $"Expected UTF16 Adjustment: {DotnetUtf16Adjustment}, but got: {SimdUnicodeUtf16Adjustment}.");
1368+
// Assert.True(DotnetScalarCountAdjustment == SimdUnicodeScalarCountAdjustment, $"Expected Scalar Count Adjustment: {DotnetScalarCountAdjustment}, but got: {SimdUnicodeScalarCountAdjustment}.");
14341369

1435-
// If your generator creates specific patterns or the utility calculates these adjustments differently,
1436-
// you'll need to adjust the expected values accordingly.
1437-
}
1438-
}
1439-
}
1440-
}
1370+
// }
1371+
// }
1372+
// }
1373+
// }
14411374

14421375

14431376
}

0 commit comments

Comments
 (0)