Skip to content

Commit 4919672

Browse files
committed
slight cleanup + progress
1 parent 051e55b commit 4919672

File tree

1 file changed

+16
-5
lines changed

1 file changed

+16
-5
lines changed

src/UTF8.cs

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@ namespace SimdUnicode
99
public static class UTF8
1010
{
1111

12-
public unsafe static byte* RewindAndValidateWithErrors(int priorBytes, byte* buf, int len)
12+
public unsafe static byte* RewindAndValidateWithErrors(int priorBytes, byte* buf, int len,ref int utf16CodeUnitCountAdjustment, ref int scalarCountAdjustment)
1313
{
14+
15+
int TempUtf16CodeUnitCountAdjustment = 0;
16+
int TempScalarCountAdjustment = 0;
17+
1418
int howFarBack = priorBytes;
1519
int extraLen = 0;
1620
bool foundLeadingBytes = false;
@@ -27,13 +31,20 @@ public static class UTF8
2731
}
2832
if (!foundLeadingBytes)
2933
{
34+
utf16CodeUnitCountAdjustment += TempUtf16CodeUnitCountAdjustment;
35+
scalarCountAdjustment += TempScalarCountAdjustment;
3036
return buf - howFarBack;
3137
}
3238

39+
// TODO : fix Count handling here
40+
3341

3442
// Now buf points to the start of a UTF-8 sequence or the start of the buffer.
3543
// Validate from this new start point with the adjusted length.
36-
byte* invalidByte = GetPointerToFirstInvalidByteScalar(buf, len + extraLen,out int utf16CodeUnitCountAdjustment, out int scalarCountAdjustment);
44+
byte* invalidByte = GetPointerToFirstInvalidByteScalar(buf, len + extraLen,out TempUtf16CodeUnitCountAdjustment, out TempScalarCountAdjustment);
45+
46+
utf16CodeUnitCountAdjustment += TempUtf16CodeUnitCountAdjustment;
47+
scalarCountAdjustment += TempScalarCountAdjustment;
3748

3849
return invalidByte;
3950
}
@@ -516,7 +527,7 @@ public unsafe static void AdjustForSkippedBytes(byte* pInputBuffer,// int skippe
516527
{
517528

518529
// TODO/think about : this path iss not explicitly tested
519-
Console.WriteLine("----Checkpoint 1:All ASCII need rewind");
530+
// Console.WriteLine("----Checkpoint 1:All ASCII need rewind");
520531
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment;
521532
scalarCountAdjustment = TempScalarCountAdjustment;
522533

@@ -636,7 +647,7 @@ public unsafe static void AdjustForSkippedBytes(byte* pInputBuffer,// int skippe
636647
if (!Avx2.TestZ(prevIncomplete, prevIncomplete))
637648
{
638649

639-
Console.WriteLine("----Checkpoint 2:SIMD rewind");
650+
// Console.WriteLine("----Checkpoint 2:SIMD rewind");
640651
// We have an unterminated sequence.
641652
processedLength -= 3;
642653
for(int k = 0; k < 3; k++)
@@ -669,7 +680,7 @@ public unsafe static void AdjustForSkippedBytes(byte* pInputBuffer,// int skippe
669680
if (processedLength < inputLength)
670681
{
671682

672-
Console.WriteLine("----Process remaining Scalar");
683+
// Console.WriteLine("----Process remaining Scalar");
673684
int overlapCount = 0;
674685

675686
// // We need to possibly backtrack to the start of the last code point

0 commit comments

Comments
 (0)