Skip to content

Commit 8e84627

Browse files
committed
cleanup
1 parent b9ff7c3 commit 8e84627

File tree

1 file changed

+7
-77
lines changed

1 file changed

+7
-77
lines changed

src/UTF8.cs

Lines changed: 7 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@ public static class UTF8
1414

1515
public unsafe static byte* RewindAndValidateWithErrors(int howFarBack, byte* buf, int len,ref int utf16CodeUnitCountAdjustment, ref int scalarCountAdjustment)
1616
{
17-
Console.WriteLine("-Rewind Validate with Errors");
18-
Console.WriteLine("current Byte:" + Convert.ToString(buf[0], 2).PadLeft(8, '0'));
1917

2018
int TempUtf16CodeUnitCountAdjustment = 0;
2119
int TempScalarCountAdjustment = 0;
@@ -31,11 +29,6 @@ public static class UTF8
3129
if (foundLeadingBytes)
3230
{
3331
buf -= i;
34-
// extraLen = i; // a measure of how far we've backed up, only useful for debugging
35-
// Console.WriteLine(howFarBack);
36-
Console.WriteLine("Found leading byte at:" + i + ",Byte:" + Convert.ToString(candidateByte, 2).PadLeft(8, '0'));
37-
38-
// Console.WriteLine("Backed up " + extraLen + 1 + " bytes");
3932
break;
4033
}
4134
}
@@ -218,32 +211,16 @@ public unsafe static (int totalbyteadjustment,int backedupByHowMuch,int ascii,in
218211

219212
public static (int utfadjust, int scalaradjust) CalculateN2N3FinalSIMDAdjustments(int asciibytes, int n4, int contbytes, int totalbyte)
220213
{
221-
222-
Console.WriteLine("CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte);
223-
// Calculate n3 based on the provided formula
214+
// Console.WriteLine("CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte);
224215
int n3 = asciibytes - 2 * n4 + 2 * contbytes - totalbyte;
225-
226-
// Calculate n2 based on the provided formula
227216
int n2 = -2 * asciibytes + n4 - 3 * contbytes + 2 * totalbyte;
228-
229-
// Calculate utfadjust by adding them all up
230217
int utfadjust = -2 * n4 - 2 * n3 - n2;
231-
232-
// Calculate scalaradjust based on n4
233218
int scalaradjust = -n4;
234219

235-
236-
237-
238-
// Return the calculated utfadjust and scalaradjust
239220
return (utfadjust, scalaradjust);
240221
}
241222

242-
243-
244-
245-
246-
public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(int start_point, int processedLength, byte* pInputBuffer, int asciibytes, int n4, int n2, int contbytes)
223+
public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(int start_point, int processedLength, byte* pInputBuffer, int asciibytes, int n4, int contbytes)
247224
{
248225
// Calculate the total bytes from start_point to processedLength
249226
int totalbyte = processedLength - start_point;
@@ -257,7 +234,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
257234

258235
var (utfadjust,scalaradjust) = CalculateN2N3FinalSIMDAdjustments( asciibytes + adjustascii, n4 + adjustn4, contbytes + adjustcont, totalbyte + adjusttotalbyte);
259236

260-
// Return the calculated n2 and n3
261237
return (utfadjust, scalaradjust);
262238
}
263239

@@ -339,7 +315,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
339315
Vector128<byte> fourthByte = Vector128.Create((byte)(0b11110000u - 0x80));
340316
Vector128<byte> v0f = Vector128.Create((byte)0x0F);
341317
Vector128<byte> v80 = Vector128.Create((byte)0x80);
342-
343318
for (; processedLength + 16 <= inputLength; processedLength += 16)
344319
{
345320

@@ -417,8 +392,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
417392

418393
public unsafe static byte* GetPointerToFirstInvalidByteAvx2(byte* pInputBuffer, int inputLength,out int utf16CodeUnitCountAdjustment, out int scalarCountAdjustment)
419394
{
420-
Console.WriteLine("--------------------------Calling function----------------------------------");
421-
// Console.WriteLine("Length: " + inputLength);
422395
int processedLength = 0;
423396
int TempUtf16CodeUnitCountAdjustment= 0 ;
424397
int TempScalarCountAdjustment = 0;
@@ -570,11 +543,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
570543
// The block goes from processedLength to processedLength/16*16.
571544
int asciibytes = 0; // number of ascii bytes in the block (could also be called n1)
572545
int contbytes = 0; // number of continuation bytes in the block
573-
int n4 = 0; // number of 4-byte sequences that start in this block
574-
// int totalbyte = 0, n3 = 0, n2 = 0;
575-
576-
577-
546+
int n4 = 0; // number of 4-byte sequences that start in this block
578547

579548
for (; processedLength + 32 <= inputLength; processedLength += 32)
580549
{
@@ -586,12 +555,10 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
586555
{
587556
// We have an ASCII block, no need to process it, but
588557
// we need to check if the previous block was incomplete.
558+
//
589559
if (!Avx2.TestZ(prevIncomplete, prevIncomplete))
590560
{
591-
// TODO? : this path is not explicitly tested
592-
Console.WriteLine("---------All ascii need rewind");
593-
594-
561+
// TODO? : this path is not explicitly tested, write tests
595562
int totalbyteasciierror = processedLength - start_point;
596563
var (utfadjustasciierror, scalaradjustasciierror) = CalculateN2N3FinalSIMDAdjustments(asciibytes, n4, contbytes, totalbyteasciierror);
597564

@@ -605,7 +572,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
605572
}
606573
else // Contains non-ASCII characters, we need to do non-trivial processing
607574
{
608-
Console.WriteLine("--Found non-ascii:triggering SIMD routine at " + processedLength + "bytes"); //debug
609575
// Use SubtractSaturate to effectively compare if bytes in block are greater than markers.
610576
Vector256<byte> shuffled = Avx2.Permute2x128(prevInputBlock, currentBlock, 0x21);
611577
prevInputBlock = currentBlock;
@@ -625,13 +591,8 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
625591
Vector256<byte> error = Avx2.Xor(must23As80, sc);
626592
if (!Avx2.TestZ(error, error))
627593
{
628-
Console.WriteLine("-----Error path!!");
629-
630594
int totalbyteasciierror = processedLength - start_point;
631-
var (utfadjustasciierror, scalaradjustasciierror) = calculateErrorPathadjust(start_point, processedLength, pInputBuffer, asciibytes, n4, contbytes, contbytes);
632-
633-
Console.WriteLine("calculateErrorPathadjust utf16 adjustment:"+ utfadjustasciierror);
634-
Console.WriteLine("calculateErrorPathadjust scalar adjustment:"+ scalaradjustasciierror);
595+
var (utfadjustasciierror, scalaradjustasciierror) = calculateErrorPathadjust(start_point, processedLength, pInputBuffer, asciibytes, n4, contbytes);
635596

636597
utf16CodeUnitCountAdjustment = utfadjustasciierror;
637598
scalarCountAdjustment = scalaradjustasciierror;
@@ -645,52 +606,30 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
645606
utf16CodeUnitCountAdjustment += TailUtf16CodeUnitCountAdjustment;
646607
scalarCountAdjustment += TailScalarCodeUnitCountAdjustment;
647608

648-
// Console.WriteLine("--------"); //debug
649-
Console.WriteLine("TempUTF16 after error rewind:"+ utf16CodeUnitCountAdjustment);
650-
Console.WriteLine("TempScalar '' '' '':"+ scalarCountAdjustment);
651-
652609
return invalidBytePointer;
653610
}
654611

655-
// Console.WriteLine("Doublecount(Temp) after SIMD processing:" + TempUtf16CodeUnitCountAdjustment); debug
656-
// Console.WriteLine("Scalarcount after SIMD processing:" + TempScalarCountAdjustment);
657612
prevIncomplete = Avx2.SubtractSaturate(currentBlock, maxValue);
658613

659614
if (!Avx2.TestZ(prevIncomplete, prevIncomplete))
660615
{
661616
// We have an unterminated sequence.
662-
Console.WriteLine("---Unterminated seq--- at " + processedLength + "bytes");
663-
664-
665617
var (totalbyteadjustment, i,tempascii, tempcont, tempn4) = adjustmentFactor(pInputBuffer + processedLength + 32);
666618

667-
Console.WriteLine("this is n4 adjusted by the adjustmentfactor function :" + tempn4 + " contbyte: " + contbytes);
668-
6
669619
processedLength -= i;
670620
n4 += tempn4;
671621
contbytes +=tempcont;
672622

673-
lastSIMDisIncomplete = true;
674-
675-
// // Console.WriteLine("TempUTF16:"+ TempUtf16CodeUnitCountAdjustment);
676-
// // Console.WriteLine("TempScalar:"+ TempScalarCountAdjustment);
677-
678623
}
679624

680625
// No errors! Updating the variables we keep track of
681626
// We use one instruction (MoveMask) to update ncon, plus one arithmetic operation.
682627
contbytes += (int)Popcnt.PopCount((uint)Avx2.MoveMask(sc));
683628

684-
685-
686629
// We use two instructions (SubtractSaturate and MoveMask) to update n4, with one arithmetic operation.
687630
n4 += (int)Popcnt.PopCount((uint)Avx2.MoveMask(Avx2.SubtractSaturate(currentBlock, fourthByte)));
688-
Console.WriteLine("No error has been detected! Adding contbytes: " + (int)Popcnt.PopCount((uint)Avx2.MoveMask(sc)) + "Adding n4: " + (int)Popcnt.PopCount((uint)Avx2.MoveMask(Avx2.SubtractSaturate(currentBlock, fourthByte))));
689-
Console.WriteLine(" this is the accumulated contbytes" + contbytes + " and n4:" + n4) ; // debug
690631
}
691-
asciibytes += (int)(32 - Popcnt.PopCount((uint)mask));// TODO(Nick Nuon): simplify this expression
692-
693-
632+
asciibytes += (int)(32 - Popcnt.PopCount((uint)mask));
694633
}
695634

696635
// important: we just update asciibytes if there was no error.
@@ -712,7 +651,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
712651

713652

714653
}
715-
// Console.WriteLine("-Done with SIMD part!"); //debug
716654
// We have processed all the blocks using SIMD, we need to process the remaining bytes.
717655
// Process the remaining bytes with the scalar function
718656

@@ -721,11 +659,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
721659
// 11110xxxx 10xxxxxx 10xxxxxx 10xxxxxx <== we might be pointing at the last byte
722660
if (processedLength < inputLength)
723661
{
724-
Console.WriteLine("----Process remaining Scalar @ " + processedLength + "bytes");
725-
// int overlapCount = 0;
726-
// Console.WriteLine("processed length after backtrack:" + processedLength);
727-
// Console.WriteLine("TempUTF16 before tail remaining check:"+ TempUtf16CodeUnitCountAdjustment);
728-
// Console.WriteLine("TempScalar '' '' '':"+ TempScalarCountAdjustment);
729662
byte* invalidBytePointer = SimdUnicode.UTF8.RewindAndValidateWithErrors(32,pInputBuffer + processedLength, inputLength - processedLength,ref TailUtf16CodeUnitCountAdjustment,ref TailScalarCodeUnitCountAdjustment);
730663
if (invalidBytePointer != pInputBuffer + inputLength)
731664
{
@@ -734,8 +667,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
734667
// An invalid byte was found by the scalar function
735668
return invalidBytePointer;
736669
}
737-
// Console.WriteLine("TempUTF16 after tail remaining check:"+ TempUtf16CodeUnitCountAdjustment);
738-
// Console.WriteLine("TempScalar '' '' '':"+ TempScalarCountAdjustment);
739670
}
740671
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment;
741672
scalarCountAdjustment = TempScalarCountAdjustment + TailScalarCodeUnitCountAdjustment;
@@ -746,7 +677,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
746677
public unsafe static byte* GetPointerToFirstInvalidByteArm64(byte* pInputBuffer, int inputLength)
747678
{
748679
int processedLength = 0;
749-
750680
int TempUtf16CodeUnitCountAdjustment= 0 ;
751681
int TempScalarCountAdjustment = 0;
752682

0 commit comments

Comments
 (0)