@@ -18,23 +18,28 @@ public static class UTF8
18
18
int extraLen = 0 ;
19
19
bool foundLeadingBytes = false ;
20
20
21
- for ( int i = 0 ; i < howFarBack ; i ++ )
21
+ for ( int i = 0 ; i <= howFarBack ; i ++ )
22
22
{
23
23
byte candidateByte = buf [ 0 - i ] ;
24
24
foundLeadingBytes = ( candidateByte & 0b11000000 ) != 0b10000000 ;
25
25
if ( foundLeadingBytes )
26
26
{
27
+ if ( i == 0 ) { break ; }
28
+ Console . WriteLine ( "Found leading byte at:" + i + ",Byte:" + candidateByte . ToString ( "X2" ) ) ;
27
29
// adjustment to avoid double counting
28
30
if ( ( candidateByte & 0b11100000 ) == 0b11000000 ) // Start of a 2-byte sequence
29
31
{
32
+ Console . WriteLine ( "Found 2 byte" ) ;
30
33
TempUtf16CodeUnitCountAdjustment += 1 ;
31
34
}
32
35
if ( ( candidateByte & 0b11110000 ) == 0b11100000 ) // Start of a 3-byte sequence
33
36
{
37
+ Console . WriteLine ( "Found 3 byte" ) ;
34
38
TempUtf16CodeUnitCountAdjustment += 2 ;
35
39
}
36
40
if ( ( candidateByte & 0b11111000 ) == 0b11110000 ) // Start of a 4-byte sequence
37
41
{
42
+ Console . WriteLine ( "Found 4 byte" ) ;
38
43
TempUtf16CodeUnitCountAdjustment += 2 ;
39
44
TempScalarCountAdjustment += 1 ;
40
45
}
@@ -74,8 +79,11 @@ public static class UTF8
74
79
utf16CodeUnitCountAdjustment += TailUtf16CodeUnitCountAdjustment ;
75
80
scalarCountAdjustment += TailScalarCountAdjustment ;
76
81
77
- Console . WriteLine ( "utf16count after rewint:" + utf16CodeUnitCountAdjustment ) ;
78
- Console . WriteLine ( "scalarcount after rewint:" + scalarCountAdjustment ) ;
82
+ Console . WriteLine ( "utf16count after rewint(Temp):" + TempUtf16CodeUnitCountAdjustment ) ;
83
+ Console . WriteLine ( "scalarcount after rewint:" + TempScalarCountAdjustment ) ;
84
+
85
+ Console . WriteLine ( "utf16count after rewint(Scalar):" + TailUtf16CodeUnitCountAdjustment ) ;
86
+ Console . WriteLine ( "scalarcount after rewint:" + TailScalarCountAdjustment ) ;
79
87
80
88
return invalidBytePointer ;
81
89
}
@@ -620,11 +628,7 @@ public unsafe static void AdjustForSkippedBytes(byte* pInputBuffer,// int skippe
620
628
uint threeByteCount = threeBytePlusCount - fourByteCount ; // Isolate 3-byte starts by subtracting 4-byte starts.
621
629
uint twoByteCount = twoBytePlusCount - threeBytePlusCount ; // Isolate 2-byte starts by subtracting 3-byte and 4-byte starts.
622
630
623
- // Adjustments
624
- TempUtf16CodeUnitCountAdjustment -= ( int ) fourByteCount * 2 ;
625
- TempUtf16CodeUnitCountAdjustment -= ( int ) twoByteCount ;
626
- TempUtf16CodeUnitCountAdjustment -= ( int ) threeByteCount * 2 ;
627
- TempScalarCountAdjustment -= ( int ) fourByteCount ;
631
+
628
632
629
633
Vector256 < byte > shuffled = Avx2 . Permute2x128 ( prevInputBlock , currentBlock , 0x21 ) ;
630
634
prevInputBlock = currentBlock ;
@@ -649,17 +653,19 @@ public unsafe static void AdjustForSkippedBytes(byte* pInputBuffer,// int skippe
649
653
TailUtf16CodeUnitCountAdjustment = 0 ;
650
654
651
655
652
- int off = processedLength >= 32 ? processedLength - 32 : 0 ; //processedLength;
653
- // Console.WriteLine(off);
656
+ int off = processedLength >= 32 ? processedLength : 0 ; //processedLength;
657
+
658
+ Console . WriteLine ( "This is off :" + off ) ;
654
659
// return SimdUnicode.UTF8.RewindAndValidateWithErrors(off, pInputBuffer + off, inputLength - off);
655
660
// byte* invalidBytePointer = SimdUnicode.UTF8.RewindAndValidateWithErrors(off, pInputBuffer + off, inputLength - off, ref utf16CodeUnitCountAdjustment,ref scalarCountAdjustment);
656
661
byte * invalidBytePointer = SimdUnicode . UTF8 . RewindAndValidateWithErrors ( off , pInputBuffer + off , inputLength , ref TailUtf16CodeUnitCountAdjustment , ref TailScalarCodeUnitCountAdjustment ) ;
657
662
663
+ // byte* invalidBytePointer = SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar(pInputBuffer,processedLength,out TailUtf16CodeUnitCountAdjustment,out TailScalarCodeUnitCountAdjustment);
658
664
// Adjustments not to double count
659
- TempUtf16CodeUnitCountAdjustment += ( int ) fourByteCount * 2 ;
660
- TempUtf16CodeUnitCountAdjustment += ( int ) twoByteCount ;
661
- TempUtf16CodeUnitCountAdjustment += ( int ) threeByteCount * 2 ;
662
- TempScalarCountAdjustment += ( int ) fourByteCount ;
665
+ // TempUtf16CodeUnitCountAdjustment += (int)fourByteCount * 2;
666
+ // TempUtf16CodeUnitCountAdjustment += (int)twoByteCount;
667
+ // TempUtf16CodeUnitCountAdjustment += (int)threeByteCount *2;
668
+ // TempScalarCountAdjustment += (int)fourByteCount;
663
669
664
670
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment ;
665
671
scalarCountAdjustment = TempScalarCountAdjustment + TailScalarCodeUnitCountAdjustment ;
@@ -669,6 +675,12 @@ public unsafe static void AdjustForSkippedBytes(byte* pInputBuffer,// int skippe
669
675
return invalidBytePointer ;
670
676
671
677
}
678
+ // Adjustments
679
+ TempUtf16CodeUnitCountAdjustment -= ( int ) fourByteCount * 2 ;
680
+ TempUtf16CodeUnitCountAdjustment -= ( int ) twoByteCount ;
681
+ TempUtf16CodeUnitCountAdjustment -= ( int ) threeByteCount * 2 ;
682
+ TempScalarCountAdjustment -= ( int ) fourByteCount ;
683
+
672
684
prevIncomplete = Avx2 . SubtractSaturate ( currentBlock , maxValue ) ;
673
685
}
674
686
}
0 commit comments