Skip to content

Commit fe73718

Browse files
committed
adda validate count to tests
1 parent ea0f6c7 commit fe73718

File tree

2 files changed

+25
-10
lines changed

2 files changed

+25
-10
lines changed

src/UTF8.cs

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public static class UTF8
1717
for (int i = 0; i <= howFarBack; i++)
1818
{
1919
byte b = buf[0 - i];
20-
foundLeadingBytes = ((b & 0b11000000) != 0b10000000);
20+
foundLeadingBytes = (b & 0b11000000) != 0b10000000;
2121
if (foundLeadingBytes)
2222
{
2323
buf -= i;
@@ -647,22 +647,23 @@ public unsafe static void AdjustForSkippedBytes(byte* pInputBuffer,// int skippe
647647
int candidateByte = pInputBuffer[processedLength + k];
648648
if ((pInputBuffer[processedLength + k] & 0b11000000) == 0b11000000)
649649
{
650-
if ((candidateByte & 0b11110000) == 0b11100000) // Start of a 3-byte sequence
650+
if ((candidateByte & 0b11100000) == 0b11000000) // Start of a 2-byte sequence
651651
{
652-
TempUtf16CodeUnitCountAdjustment += 1; // Still adjusts for a single UTF-16 unit
652+
TempUtf16CodeUnitCountAdjustment += 1;
653653
}
654-
if ((candidateByte & 0b11111000) == 0b11110000) // Start of a 4-byte sequence
654+
if ((candidateByte & 0b11110000) == 0b11100000) // Start of a 3-byte sequence
655655
{
656-
TempUtf16CodeUnitCountAdjustment += 1; // Adjusts for two UTF-16 units (surrogate pair)
657-
TempScalarCountAdjustment += 1; // Adjust for one scalar value
656+
TempUtf16CodeUnitCountAdjustment += 2;
658657
}
659-
if ((candidateByte & 0b11100000) == 0b11000000) // Start of a 2-byte sequence
658+
if ((candidateByte & 0b11111000) == 0b11110000) // Start of a 4-byte sequence
660659
{
661-
TempUtf16CodeUnitCountAdjustment += 1; // Adjust for a single UTF-16 unit
660+
TempUtf16CodeUnitCountAdjustment += 2;
661+
TempScalarCountAdjustment += 1;
662662
}
663663

664+
664665
processedLength += k;
665-
break;
666+
// break;
666667

667668
}
668669

@@ -677,7 +678,7 @@ public unsafe static void AdjustForSkippedBytes(byte* pInputBuffer,// int skippe
677678
// Process the remaining bytes with the scalar function
678679
if (processedLength < inputLength)
679680
{
680-
// We need to possibly backtrack to the start of the last code point
681+
// // We need to possibly backtrack to the start of the last code point
681682
while (processedLength > 0 && (sbyte)pInputBuffer[processedLength] <= -65)
682683
{
683684
processedLength -= 1;

test/UTF8ValidationTests.cs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ public void BadSequences(Utf8ValidationDelegate utf8ValidationDelegate)
188188
fixed (byte* pInput = input)
189189
{
190190
ValidateUtf8(input,utf8ValidationDelegate);
191+
ValidateCount(input,utf8ValidationDelegate);
191192
}
192193
}
193194
}
@@ -249,6 +250,7 @@ public void NoError(Utf8ValidationDelegate utf8ValidationDelegate)
249250
bool isValidUtf8 = ValidateUtf8(utf8,utf8ValidationDelegate);
250251
string utf8HexString = BitConverter.ToString(utf8).Replace("-", " ");
251252
Assert.True(isValidUtf8, $"Failure NoErrorTest. Sequence: {utf8HexString}");
253+
ValidateCount(utf8,utf8ValidationDelegate);
252254
}
253255
}
254256
}
@@ -310,6 +312,7 @@ private void RunTestForByteLength(int byteLength,Utf8ValidationDelegate utf8Vali
310312
byte[] utf8 = generator.Generate(outputLength, byteLength).ToArray();
311313
bool isValidUtf8 = ValidateUtf8(utf8,utf8ValidationDelegate);
312314
Assert.True(isValidUtf8, $"Failure for {byteLength}-byte UTF8 of length {outputLength} in trial {trial}");
315+
ValidateCount(utf8,utf8ValidationDelegate);
313316
}
314317
}
315318
}
@@ -369,6 +372,7 @@ public void BadHeaderBits(Utf8ValidationDelegate utf8ValidationDelegate)
369372
utf8[i] = 0b11111000; // Forcing a header bits error
370373
Assert.False(ValidateUtf8(utf8,utf8ValidationDelegate));
371374
Assert.True(InvalidateUtf8(utf8, i,utf8ValidationDelegate));
375+
ValidateCount(utf8,utf8ValidationDelegate);
372376
utf8[i] = oldByte; // Restore the original byte
373377
}
374378
}
@@ -432,6 +436,7 @@ public void TooShortError(Utf8ValidationDelegate utf8ValidationDelegate)
432436
utf8[i] = 0b11100000; // Forcing a too short error
433437
Assert.False(ValidateUtf8(utf8,utf8ValidationDelegate));
434438
Assert.True(InvalidateUtf8(utf8, i,utf8ValidationDelegate));
439+
ValidateCount(utf8,utf8ValidationDelegate);
435440
utf8[i] = oldByte; // Restore the original byte
436441
}
437442
}
@@ -498,6 +503,7 @@ public void TooLongError(Utf8ValidationDelegate utf8ValidationDelegate)
498503
utf8[i] = 0b10000000; // Forcing a too long error
499504
Assert.False(ValidateUtf8(utf8,utf8ValidationDelegate));
500505
Assert.True(InvalidateUtf8(utf8, i,utf8ValidationDelegate));
506+
ValidateCount(utf8,utf8ValidationDelegate);
501507
utf8[i] = oldByte; // Restore the original byte
502508
}
503509
}
@@ -577,6 +583,7 @@ public void OverlongError(Utf8ValidationDelegate utf8ValidationDelegate)
577583

578584
Assert.False(ValidateUtf8(utf8,utf8ValidationDelegate));
579585
Assert.True(InvalidateUtf8(utf8, i,utf8ValidationDelegate));
586+
ValidateCount(utf8,utf8ValidationDelegate);
580587

581588
utf8[i] = old;
582589
utf8[i + 1] = secondOld;
@@ -665,6 +672,8 @@ public void TooShortErrorAtEnd(Utf8ValidationDelegate utf8ValidationDelegate)
665672

666673
byte* dotnetResult = DotnetRuntime.Utf8Utility.GetPointerToFirstInvalidByte(pInput, i + offset, out SimdUnicodeUtf16Adjustment, out SimdUnicodeScalarCountAdjustment);
667674
Assert.True(dotnetResult == pInput + i + offset);
675+
676+
ValidateCount(utf8,utf8ValidationDelegate);
668677
}
669678

670679
}
@@ -737,6 +746,7 @@ public void Invalid0xf50xff(Utf8ValidationDelegate utf8ValidationDelegate)
737746

738747
Assert.False(ValidateUtf8(utf8,utf8ValidationDelegate)); // Expect the validation to fail due to the invalid byte
739748
Assert.True(InvalidateUtf8(utf8,position,utf8ValidationDelegate));
749+
ValidateCount(utf8,utf8ValidationDelegate);
740750
}
741751
}
742752
}
@@ -819,6 +829,7 @@ public void TooLargeError(Utf8ValidationDelegate utf8ValidationDelegate)
819829

820830
Assert.False(ValidateUtf8(utf8,utf8ValidationDelegate));
821831
Assert.True(InvalidateUtf8(utf8, i,utf8ValidationDelegate));
832+
ValidateCount(utf8,utf8ValidationDelegate);
822833
utf8[i] = old;
823834
}
824835
}
@@ -888,6 +899,7 @@ public void TooLargeErrorAtEnd(Utf8ValidationDelegate utf8ValidationDelegate)
888899

889900
Assert.False(ValidateUtf8(filler,utf8ValidationDelegate));
890901
Assert.True(InvalidateUtf8(filler, outputLength -1,utf8ValidationDelegate));
902+
ValidateCount(filler,utf8ValidationDelegate);
891903
}
892904

893905

@@ -963,6 +975,7 @@ public void SurrogateErrorTest(Utf8ValidationDelegate utf8ValidationDelegate)
963975

964976
Assert.False(ValidateUtf8(utf8,utf8ValidationDelegate));
965977
Assert.True(InvalidateUtf8(utf8, i,utf8ValidationDelegate));
978+
ValidateCount(utf8,utf8ValidationDelegate);
966979
}
967980

968981
utf8[i] = old;
@@ -1044,6 +1057,7 @@ public void BruteForceTest(Utf8ValidationDelegate utf8ValidationDelegate)
10441057
// Validate the modified sequence with both methods
10451058
bool isValidPrimary = ValidateUtf8(modifiedUtf8,utf8ValidationDelegate);
10461059
bool isValidFuschia = ValidateUtf8Fuschia(modifiedUtf8);
1060+
ValidateCount(modifiedUtf8,utf8ValidationDelegate);
10471061

10481062
// Ensure both methods agree on the validation result
10491063
Assert.Equal(isValidPrimary, isValidFuschia);

0 commit comments

Comments
 (0)