Skip to content

Commit d784815

Browse files
committed
save game
1 parent 1cee9d6 commit d784815

File tree

1 file changed

+22
-19
lines changed

1 file changed

+22
-19
lines changed

src/UTF8.cs

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ namespace SimdUnicode
1010
public static class UTF8
1111
{
1212

13-
//debug helper function for debugging: it prints a green byte every 32 bytes and a red byte at a given index
13+
// //debug helper function for debugging: it prints a green byte every 32 bytes and a red byte at a given index
1414
static void PrintHexAndBinary(byte[] bytes, int highlightIndex = -1)
1515
{
1616
int chunkSize = 16; // 128 bits = 16 bytes
@@ -78,20 +78,20 @@ static void PrintHexAndBinary(byte[] bytes, int highlightIndex = -1)
7878

7979
public unsafe static byte* RewindAndValidateWithErrors(int howFarBack, byte* buf, int len,ref int utf16CodeUnitCountAdjustment, ref int scalarCountAdjustment)
8080
{
81-
// Console.WriteLine("CALLING REWIND");
81+
// // Console.WriteLine("CALLING REWIND");//debug
8282
int extraLen = 0;
8383
bool foundLeadingBytes = false;
8484

8585
for (int i = 0; i <= howFarBack; i++)
8686
{
8787
byte candidateByte = buf[0 - i];
8888
foundLeadingBytes = (candidateByte & 0b11000000) != 0b10000000;
89-
Console.WriteLine($"Rewinding byte to offset {-i}: {candidateByte:X2}");
90-
Console.WriteLine(foundLeadingBytes);
89+
// Console.WriteLine($"Rewinding byte to offset {-i}: {candidateByte:X2}");//debug
90+
// Console.WriteLine(foundLeadingBytes);//debug
9191

9292
if (foundLeadingBytes)
9393
{
94-
Console.WriteLine("Found leading byte");
94+
// Console.WriteLine("Found leading byte");//debug
9595
buf -= i;
9696
break;
9797
}
@@ -257,8 +257,8 @@ public unsafe static (int totalbyteadjustment,int backedupByHowMuch,int ascii,in
257257
{
258258
if ((pInputBuffer[-i] & 0b11000000) != 0b10000000)
259259
{
260-
string binaryString = Convert.ToString(pInputBuffer[-i], 2).PadLeft(8, '0');//debug
261-
Console.WriteLine($"Stopping at byte {binaryString}"); //debug
260+
// string binaryString = Convert.ToString(pInputBuffer[-i], 2).PadLeft(8, '0');//debug
261+
// Console.WriteLine($"Stopping at byte {binaryString}"); //debug
262262
break;
263263
}
264264
contbyteadjust -= 1;
@@ -278,14 +278,14 @@ public unsafe static (int totalbyteadjustment,int backedupByHowMuch,int ascii,in
278278

279279
public static (int utfadjust, int scalaradjust) CalculateN2N3FinalSIMDAdjustments(int asciibytes, int n4, int contbytes, int totalbyte)
280280
{
281-
Console.WriteLine("---------"); //debug
282-
Console.WriteLine("CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte);//debug
281+
// Console.WriteLine("---------"); //debug
282+
// Console.WriteLine("CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte);//debug
283283
int n3 = asciibytes - 2 * n4 + 2 * contbytes - totalbyte;
284284
int n2 = -2 * asciibytes + n4 - 3 * contbytes + 2 * totalbyte;
285285
int utfadjust = -2 * n4 - 2 * n3 - n2;
286286
int scalaradjust = -n4;
287287

288-
Console.WriteLine("CalculateN2N3FinalSIMDAdjustments's output debug. This is n3 count:" + n3 + " n2: " + n2 + " utfadjust:" + utfadjust + " scalaradjust:" + scalaradjust);//debug
288+
// Console.WriteLine("CalculateN2N3FinalSIMDAdjustments's output debug. This is n3 count:" + n3 + " n2: " + n2 + " utfadjust:" + utfadjust + " scalaradjust:" + scalaradjust);//debug
289289

290290
return (utfadjust, scalaradjust);
291291
}
@@ -494,9 +494,9 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
494494

495495
public unsafe static byte* GetPointerToFirstInvalidByteAvx2(byte* pInputBuffer, int inputLength,out int utf16CodeUnitCountAdjustment, out int scalarCountAdjustment)
496496
{
497-
Console.ForegroundColor = ConsoleColor.Blue; //debug
498-
Console.WriteLine("-------------------------------------");//debug
499-
Console.ResetColor();//debug
497+
// Console.ForegroundColor = ConsoleColor.Blue; //debug
498+
// Console.WriteLine("-------------------------------------");//debug
499+
// Console.ResetColor();//debug
500500

501501
int processedLength = 0;
502502
int TempUtf16CodeUnitCountAdjustment= 0 ;
@@ -690,7 +690,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
690690
Vector256<byte> error = Avx2.Xor(must23As80, sc);
691691
// if (!Avx2.TestZ(error, error))
692692
// {
693-
// Console.WriteLine($"--Error! @ {processedLength} bytes");//debug
693+
// Console.WriteLine($"--Error! @ {processedLength} bytes");//debug
694694

695695
// int off = processedLength >= 32 ? processedLength - 32 : processedLength;
696696
// byte* invalidBytePointer = SimdUnicode.UTF8.RewindAndValidateWithErrors(off, pInputBuffer + processedLength, inputLength - processedLength, ref TailUtf16CodeUnitCountAdjustment,ref TailScalarCodeUnitCountAdjustment);
@@ -733,16 +733,19 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
733733

734734
if (!Avx2.TestZ(error, error))
735735
{
736-
Console.WriteLine($"--Error! @ {processedLength} bytes");//debug
736+
// Console.WriteLine($"--Error! @ {processedLength} bytes");//debug
737737

738-
int off = processedLength >= 32 ? processedLength - 32 : processedLength;
738+
int off = processedLength > 32 ? processedLength - 32 : processedLength;// this does not backup ff processedlength = 32
739+
// int off = processedLength >= 32 ? processedLength - 32 : processedLength; original/main algorithm working
740+
741+
// Console.WriteLine($"Offset backup by: {off}");//debug
739742
byte* invalidBytePointer = SimdUnicode.UTF8.RewindAndValidateWithErrors(off, pInputBuffer + processedLength, inputLength - processedLength, ref TailUtf16CodeUnitCountAdjustment,ref TailScalarCodeUnitCountAdjustment);
740743
bool TooLongErroronEdge = false;
741744

742745
utf16CodeUnitCountAdjustment = TailUtf16CodeUnitCountAdjustment;
743746
scalarCountAdjustment = TailScalarCodeUnitCountAdjustment;
744747

745-
Console.WriteLine($"RewindScalarValidation's function utf16adjust:{TailUtf16CodeUnitCountAdjustment}, scalaradjust:{TailScalarCodeUnitCountAdjustment}");
748+
// Console.WriteLine($"RewindScalarValidation's function utf16adjust:{TailUtf16CodeUnitCountAdjustment}, scalaradjust:{TailScalarCodeUnitCountAdjustment}");//debug
746749

747750
// We need to take care of eg
748751
// 11011110 10101101 11110000 10101101 10101111 10011111 11010111 10101000 11001101 10111001 11010100 10000111 11101111 10010000 10000000 11110011
@@ -772,7 +775,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
772775
// if (isContinuationByte && isAtBoundary && isOneByteAfterProcessedLength)// this alone creates false positives
773776
if (isContinuationByte && isOneByteAfterProcessedLength)
774777
{
775-
Console.WriteLine("Triggering TooLongErrorOnEdge adjustment");
778+
// Console.WriteLine("Triggering TooLongErrorOnEdge adjustment");//debug
776779
TooLongErroronEdge = true;
777780
}
778781

@@ -796,7 +799,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
796799
processedLength -= i;
797800
n4 += tempn4;// this is + because the adjustment function returns something negative already
798801
contbytes +=tempcont;
799-
Console.WriteLine($"Unterminated! @ {processedLength} Backing up by {i}"); //debug
802+
// Console.WriteLine($"Unterminated! @ {processedLength} Backing up by {i}"); //debug
800803
}
801804

802805

0 commit comments

Comments
 (0)