@@ -10,7 +10,7 @@ namespace SimdUnicode
10
10
public static class UTF8
11
11
{
12
12
13
- //debug helper function for debugging: it prints a green byte every 32 bytes and a red byte at a given index
13
+ // //debug helper function for debugging: it prints a green byte every 32 bytes and a red byte at a given index
14
14
static void PrintHexAndBinary ( byte [ ] bytes , int highlightIndex = - 1 )
15
15
{
16
16
int chunkSize = 16 ; // 128 bits = 16 bytes
@@ -78,20 +78,20 @@ static void PrintHexAndBinary(byte[] bytes, int highlightIndex = -1)
78
78
79
79
public unsafe static byte * RewindAndValidateWithErrors ( int howFarBack , byte * buf , int len , ref int utf16CodeUnitCountAdjustment , ref int scalarCountAdjustment )
80
80
{
81
- // Console.WriteLine("CALLING REWIND");
81
+ // // Console.WriteLine("CALLING REWIND");//debug
82
82
int extraLen = 0 ;
83
83
bool foundLeadingBytes = false ;
84
84
85
85
for ( int i = 0 ; i <= howFarBack ; i ++ )
86
86
{
87
87
byte candidateByte = buf [ 0 - i ] ;
88
88
foundLeadingBytes = ( candidateByte & 0b11000000 ) != 0b10000000 ;
89
- Console . WriteLine ( $ "Rewinding byte to offset { - i } : { candidateByte : X2} ") ;
90
- Console . WriteLine ( foundLeadingBytes ) ;
89
+ // Console.WriteLine($"Rewinding byte to offset {-i}: {candidateByte:X2}");//debug
90
+ // Console.WriteLine(foundLeadingBytes);//debug
91
91
92
92
if ( foundLeadingBytes )
93
93
{
94
- Console . WriteLine ( "Found leading byte" ) ;
94
+ // Console.WriteLine("Found leading byte");//debug
95
95
buf -= i ;
96
96
break ;
97
97
}
@@ -257,8 +257,8 @@ public unsafe static (int totalbyteadjustment,int backedupByHowMuch,int ascii,in
257
257
{
258
258
if ( ( pInputBuffer [ - i ] & 0b11000000 ) != 0b10000000 )
259
259
{
260
- string binaryString = Convert . ToString ( pInputBuffer [ - i ] , 2 ) . PadLeft ( 8 , '0' ) ; //debug
261
- Console . WriteLine ( $ "Stopping at byte { binaryString } ") ; //debug
260
+ // string binaryString = Convert.ToString(pInputBuffer[-i], 2).PadLeft(8, '0');//debug
261
+ // Console.WriteLine($"Stopping at byte {binaryString}"); //debug
262
262
break ;
263
263
}
264
264
contbyteadjust -= 1 ;
@@ -278,14 +278,14 @@ public unsafe static (int totalbyteadjustment,int backedupByHowMuch,int ascii,in
278
278
279
279
public static ( int utfadjust , int scalaradjust ) CalculateN2N3FinalSIMDAdjustments ( int asciibytes , int n4 , int contbytes , int totalbyte )
280
280
{
281
- Console . WriteLine ( "---------" ) ; //debug
282
- Console . WriteLine ( "CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte ) ; //debug
281
+ // Console.WriteLine("---------"); //debug
282
+ // Console.WriteLine("CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte);//debug
283
283
int n3 = asciibytes - 2 * n4 + 2 * contbytes - totalbyte ;
284
284
int n2 = - 2 * asciibytes + n4 - 3 * contbytes + 2 * totalbyte ;
285
285
int utfadjust = - 2 * n4 - 2 * n3 - n2 ;
286
286
int scalaradjust = - n4 ;
287
287
288
- Console . WriteLine ( "CalculateN2N3FinalSIMDAdjustments's output debug. This is n3 count:" + n3 + " n2: " + n2 + " utfadjust:" + utfadjust + " scalaradjust:" + scalaradjust ) ; //debug
288
+ // Console.WriteLine("CalculateN2N3FinalSIMDAdjustments's output debug. This is n3 count:" + n3 + " n2: " + n2 + " utfadjust:" + utfadjust + " scalaradjust:" + scalaradjust);//debug
289
289
290
290
return ( utfadjust , scalaradjust ) ;
291
291
}
@@ -494,9 +494,9 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
494
494
495
495
public unsafe static byte * GetPointerToFirstInvalidByteAvx2 ( byte * pInputBuffer , int inputLength , out int utf16CodeUnitCountAdjustment , out int scalarCountAdjustment )
496
496
{
497
- Console . ForegroundColor = ConsoleColor . Blue ; //debug
498
- Console . WriteLine ( "-------------------------------------" ) ; //debug
499
- Console . ResetColor ( ) ; //debug
497
+ // Console.ForegroundColor = ConsoleColor.Blue; //debug
498
+ // Console.WriteLine("-------------------------------------");//debug
499
+ // Console.ResetColor();//debug
500
500
501
501
int processedLength = 0 ;
502
502
int TempUtf16CodeUnitCountAdjustment = 0 ;
@@ -690,7 +690,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
690
690
Vector256 < byte > error = Avx2 . Xor ( must23As80 , sc ) ;
691
691
// if (!Avx2.TestZ(error, error))
692
692
// {
693
- // Console.WriteLine($"--Error! @ {processedLength} bytes");//debug
693
+ // Console.WriteLine($"--Error! @ {processedLength} bytes");//debug
694
694
695
695
// int off = processedLength >= 32 ? processedLength - 32 : processedLength;
696
696
// byte* invalidBytePointer = SimdUnicode.UTF8.RewindAndValidateWithErrors(off, pInputBuffer + processedLength, inputLength - processedLength, ref TailUtf16CodeUnitCountAdjustment,ref TailScalarCodeUnitCountAdjustment);
@@ -733,16 +733,19 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
733
733
734
734
if ( ! Avx2 . TestZ ( error , error ) )
735
735
{
736
- Console . WriteLine ( $ "--Error! @ { processedLength } bytes") ; //debug
736
+ // Console.WriteLine($"--Error! @ {processedLength} bytes");//debug
737
737
738
- int off = processedLength >= 32 ? processedLength - 32 : processedLength ;
738
+ int off = processedLength > 32 ? processedLength - 32 : processedLength ; // this does not backup ff processedlength = 32
739
+ // int off = processedLength >= 32 ? processedLength - 32 : processedLength; original/main algorithm working
740
+
741
+ // Console.WriteLine($"Offset backup by: {off}");//debug
739
742
byte * invalidBytePointer = SimdUnicode . UTF8 . RewindAndValidateWithErrors ( off , pInputBuffer + processedLength , inputLength - processedLength , ref TailUtf16CodeUnitCountAdjustment , ref TailScalarCodeUnitCountAdjustment ) ;
740
743
bool TooLongErroronEdge = false ;
741
744
742
745
utf16CodeUnitCountAdjustment = TailUtf16CodeUnitCountAdjustment ;
743
746
scalarCountAdjustment = TailScalarCodeUnitCountAdjustment ;
744
747
745
- Console . WriteLine ( $ "RewindScalarValidation's function utf16adjust:{ TailUtf16CodeUnitCountAdjustment } , scalaradjust:{ TailScalarCodeUnitCountAdjustment } ") ;
748
+ // Console.WriteLine($"RewindScalarValidation's function utf16adjust:{TailUtf16CodeUnitCountAdjustment}, scalaradjust:{TailScalarCodeUnitCountAdjustment}");//debug
746
749
747
750
// We need to take care of eg
748
751
// 11011110 10101101 11110000 10101101 10101111 10011111 11010111 10101000 11001101 10111001 11010100 10000111 11101111 10010000 10000000 11110011
@@ -772,7 +775,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
772
775
// if (isContinuationByte && isAtBoundary && isOneByteAfterProcessedLength)// this alone creates false positives
773
776
if ( isContinuationByte && isOneByteAfterProcessedLength )
774
777
{
775
- Console . WriteLine ( "Triggering TooLongErrorOnEdge adjustment" ) ;
778
+ // Console.WriteLine("Triggering TooLongErrorOnEdge adjustment");//debug
776
779
TooLongErroronEdge = true ;
777
780
}
778
781
@@ -796,7 +799,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
796
799
processedLength -= i ;
797
800
n4 += tempn4 ; // this is + because the adjustment function returns something negative already
798
801
contbytes += tempcont ;
799
- Console . WriteLine ( $ "Unterminated! @ { processedLength } Backing up by { i } ") ; //debug
802
+ // Console.WriteLine($"Unterminated! @ {processedLength} Backing up by {i}"); //debug
800
803
}
801
804
802
805
0 commit comments