@@ -14,8 +14,6 @@ public static class UTF8
14
14
15
15
public unsafe static byte * RewindAndValidateWithErrors ( int howFarBack , byte * buf , int len , ref int utf16CodeUnitCountAdjustment , ref int scalarCountAdjustment )
16
16
{
17
- Console . WriteLine ( "-Rewind Validate with Errors" ) ;
18
- Console . WriteLine ( "current Byte:" + Convert . ToString ( buf [ 0 ] , 2 ) . PadLeft ( 8 , '0' ) ) ;
19
17
20
18
int TempUtf16CodeUnitCountAdjustment = 0 ;
21
19
int TempScalarCountAdjustment = 0 ;
@@ -31,11 +29,6 @@ public static class UTF8
31
29
if ( foundLeadingBytes )
32
30
{
33
31
buf -= i ;
34
- // extraLen = i; // a measure of how far we've backed up, only useful for debugging
35
- // Console.WriteLine(howFarBack);
36
- Console . WriteLine ( "Found leading byte at:" + i + ",Byte:" + Convert . ToString ( candidateByte , 2 ) . PadLeft ( 8 , '0' ) ) ;
37
-
38
- // Console.WriteLine("Backed up " + extraLen + 1 + " bytes");
39
32
break ;
40
33
}
41
34
}
@@ -218,32 +211,16 @@ public unsafe static (int totalbyteadjustment,int backedupByHowMuch,int ascii,in
218
211
219
212
public static ( int utfadjust , int scalaradjust ) CalculateN2N3FinalSIMDAdjustments ( int asciibytes , int n4 , int contbytes , int totalbyte )
220
213
{
221
-
222
- Console . WriteLine ( "CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte ) ;
223
- // Calculate n3 based on the provided formula
214
+ // Console.WriteLine("CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte);
224
215
int n3 = asciibytes - 2 * n4 + 2 * contbytes - totalbyte ;
225
-
226
- // Calculate n2 based on the provided formula
227
216
int n2 = - 2 * asciibytes + n4 - 3 * contbytes + 2 * totalbyte ;
228
-
229
- // Calculate utfadjust by adding them all up
230
217
int utfadjust = - 2 * n4 - 2 * n3 - n2 ;
231
-
232
- // Calculate scalaradjust based on n4
233
218
int scalaradjust = - n4 ;
234
219
235
-
236
-
237
-
238
- // Return the calculated utfadjust and scalaradjust
239
220
return ( utfadjust , scalaradjust ) ;
240
221
}
241
222
242
-
243
-
244
-
245
-
246
- public unsafe static ( int utfadjust , int scalaradjust ) calculateErrorPathadjust ( int start_point , int processedLength , byte * pInputBuffer , int asciibytes , int n4 , int n2 , int contbytes )
223
+ public unsafe static ( int utfadjust , int scalaradjust ) calculateErrorPathadjust ( int start_point , int processedLength , byte * pInputBuffer , int asciibytes , int n4 , int contbytes )
247
224
{
248
225
// Calculate the total bytes from start_point to processedLength
249
226
int totalbyte = processedLength - start_point ;
@@ -257,7 +234,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
257
234
258
235
var ( utfadjust , scalaradjust ) = CalculateN2N3FinalSIMDAdjustments ( asciibytes + adjustascii , n4 + adjustn4 , contbytes + adjustcont , totalbyte + adjusttotalbyte ) ;
259
236
260
- // Return the calculated n2 and n3
261
237
return ( utfadjust , scalaradjust ) ;
262
238
}
263
239
@@ -339,7 +315,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
339
315
Vector128 < byte > fourthByte = Vector128 . Create ( ( byte ) ( 0b11110000u - 0x80 ) ) ;
340
316
Vector128 < byte > v0f = Vector128 . Create ( ( byte ) 0x0F ) ;
341
317
Vector128 < byte > v80 = Vector128 . Create ( ( byte ) 0x80 ) ;
342
-
343
318
for ( ; processedLength + 16 <= inputLength ; processedLength += 16 )
344
319
{
345
320
@@ -417,8 +392,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
417
392
418
393
public unsafe static byte * GetPointerToFirstInvalidByteAvx2 ( byte * pInputBuffer , int inputLength , out int utf16CodeUnitCountAdjustment , out int scalarCountAdjustment )
419
394
{
420
- Console . WriteLine ( "--------------------------Calling function----------------------------------" ) ;
421
- // Console.WriteLine("Length: " + inputLength);
422
395
int processedLength = 0 ;
423
396
int TempUtf16CodeUnitCountAdjustment = 0 ;
424
397
int TempScalarCountAdjustment = 0 ;
@@ -570,11 +543,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
570
543
// The block goes from processedLength to processedLength/16*16.
571
544
int asciibytes = 0 ; // number of ascii bytes in the block (could also be called n1)
572
545
int contbytes = 0 ; // number of continuation bytes in the block
573
- int n4 = 0 ; // number of 4-byte sequences that start in this block
574
- // int totalbyte = 0, n3 = 0, n2 = 0;
575
-
576
-
577
-
546
+ int n4 = 0 ; // number of 4-byte sequences that start in this block
578
547
579
548
for ( ; processedLength + 32 <= inputLength ; processedLength += 32 )
580
549
{
@@ -586,12 +555,10 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
586
555
{
587
556
// We have an ASCII block, no need to process it, but
588
557
// we need to check if the previous block was incomplete.
558
+ //
589
559
if ( ! Avx2 . TestZ ( prevIncomplete , prevIncomplete ) )
590
560
{
591
- // TODO? : this path is not explicitly tested
592
- Console . WriteLine ( "---------All ascii need rewind" ) ;
593
-
594
-
561
+ // TODO? : this path is not explicitly tested, write tests
595
562
int totalbyteasciierror = processedLength - start_point ;
596
563
var ( utfadjustasciierror , scalaradjustasciierror ) = CalculateN2N3FinalSIMDAdjustments ( asciibytes , n4 , contbytes , totalbyteasciierror ) ;
597
564
@@ -605,7 +572,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
605
572
}
606
573
else // Contains non-ASCII characters, we need to do non-trivial processing
607
574
{
608
- Console . WriteLine ( "--Found non-ascii:triggering SIMD routine at " + processedLength + "bytes" ) ; //debug
609
575
// Use SubtractSaturate to effectively compare if bytes in block are greater than markers.
610
576
Vector256 < byte > shuffled = Avx2 . Permute2x128 ( prevInputBlock , currentBlock , 0x21 ) ;
611
577
prevInputBlock = currentBlock ;
@@ -625,13 +591,8 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
625
591
Vector256 < byte > error = Avx2 . Xor ( must23As80 , sc ) ;
626
592
if ( ! Avx2 . TestZ ( error , error ) )
627
593
{
628
- Console . WriteLine ( "-----Error path!!" ) ;
629
-
630
594
int totalbyteasciierror = processedLength - start_point ;
631
- var ( utfadjustasciierror , scalaradjustasciierror ) = calculateErrorPathadjust ( start_point , processedLength , pInputBuffer , asciibytes , n4 , contbytes , contbytes ) ;
632
-
633
- Console . WriteLine ( "calculateErrorPathadjust utf16 adjustment:" + utfadjustasciierror ) ;
634
- Console . WriteLine ( "calculateErrorPathadjust scalar adjustment:" + scalaradjustasciierror ) ;
595
+ var ( utfadjustasciierror , scalaradjustasciierror ) = calculateErrorPathadjust ( start_point , processedLength , pInputBuffer , asciibytes , n4 , contbytes ) ;
635
596
636
597
utf16CodeUnitCountAdjustment = utfadjustasciierror ;
637
598
scalarCountAdjustment = scalaradjustasciierror ;
@@ -645,52 +606,30 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
645
606
utf16CodeUnitCountAdjustment += TailUtf16CodeUnitCountAdjustment ;
646
607
scalarCountAdjustment += TailScalarCodeUnitCountAdjustment ;
647
608
648
- // Console.WriteLine("--------"); //debug
649
- Console . WriteLine ( "TempUTF16 after error rewind:" + utf16CodeUnitCountAdjustment ) ;
650
- Console . WriteLine ( "TempScalar '' '' '':" + scalarCountAdjustment ) ;
651
-
652
609
return invalidBytePointer ;
653
610
}
654
611
655
- // Console.WriteLine("Doublecount(Temp) after SIMD processing:" + TempUtf16CodeUnitCountAdjustment); debug
656
- // Console.WriteLine("Scalarcount after SIMD processing:" + TempScalarCountAdjustment);
657
612
prevIncomplete = Avx2 . SubtractSaturate ( currentBlock , maxValue ) ;
658
613
659
614
if ( ! Avx2 . TestZ ( prevIncomplete , prevIncomplete ) )
660
615
{
661
616
// We have an unterminated sequence.
662
- Console . WriteLine ( "---Unterminated seq--- at " + processedLength + "bytes" ) ;
663
-
664
-
665
617
var ( totalbyteadjustment , i , tempascii , tempcont , tempn4 ) = adjustmentFactor ( pInputBuffer + processedLength + 32 ) ;
666
618
667
- Console . WriteLine ( "this is n4 adjusted by the adjustmentfactor function :" + tempn4 + " contbyte: " + contbytes ) ;
668
- 6
669
619
processedLength -= i ;
670
620
n4 += tempn4 ;
671
621
contbytes += tempcont ;
672
622
673
- lastSIMDisIncomplete = true ;
674
-
675
- // // Console.WriteLine("TempUTF16:"+ TempUtf16CodeUnitCountAdjustment);
676
- // // Console.WriteLine("TempScalar:"+ TempScalarCountAdjustment);
677
-
678
623
}
679
624
680
625
// No errors! Updating the variables we keep track of
681
626
// We use one instruction (MoveMask) to update ncon, plus one arithmetic operation.
682
627
contbytes += ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( sc ) ) ;
683
628
684
-
685
-
686
629
// We use two instructions (SubtractSaturate and MoveMask) to update n4, with one arithmetic operation.
687
630
n4 += ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( Avx2 . SubtractSaturate ( currentBlock , fourthByte ) ) ) ;
688
- Console . WriteLine ( "No error has been detected! Adding contbytes: " + ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( sc ) ) + "Adding n4: " + ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( Avx2 . SubtractSaturate ( currentBlock , fourthByte ) ) ) ) ;
689
- Console . WriteLine ( " this is the accumulated contbytes" + contbytes + " and n4:" + n4 ) ; // debug
690
631
}
691
- asciibytes += ( int ) ( 32 - Popcnt . PopCount ( ( uint ) mask ) ) ; // TODO(Nick Nuon): simplify this expression
692
-
693
-
632
+ asciibytes += ( int ) ( 32 - Popcnt . PopCount ( ( uint ) mask ) ) ;
694
633
}
695
634
696
635
// important: we just update asciibytes if there was no error.
@@ -712,7 +651,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
712
651
713
652
714
653
}
715
- // Console.WriteLine("-Done with SIMD part!"); //debug
716
654
// We have processed all the blocks using SIMD, we need to process the remaining bytes.
717
655
// Process the remaining bytes with the scalar function
718
656
@@ -721,11 +659,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
721
659
// 11110xxxx 10xxxxxx 10xxxxxx 10xxxxxx <== we might be pointing at the last byte
722
660
if ( processedLength < inputLength )
723
661
{
724
- Console . WriteLine ( "----Process remaining Scalar @ " + processedLength + "bytes" ) ;
725
- // int overlapCount = 0;
726
- // Console.WriteLine("processed length after backtrack:" + processedLength);
727
- // Console.WriteLine("TempUTF16 before tail remaining check:"+ TempUtf16CodeUnitCountAdjustment);
728
- // Console.WriteLine("TempScalar '' '' '':"+ TempScalarCountAdjustment);
729
662
byte * invalidBytePointer = SimdUnicode . UTF8 . RewindAndValidateWithErrors ( 32 , pInputBuffer + processedLength , inputLength - processedLength , ref TailUtf16CodeUnitCountAdjustment , ref TailScalarCodeUnitCountAdjustment ) ;
730
663
if ( invalidBytePointer != pInputBuffer + inputLength )
731
664
{
@@ -734,8 +667,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
734
667
// An invalid byte was found by the scalar function
735
668
return invalidBytePointer ;
736
669
}
737
- // Console.WriteLine("TempUTF16 after tail remaining check:"+ TempUtf16CodeUnitCountAdjustment);
738
- // Console.WriteLine("TempScalar '' '' '':"+ TempScalarCountAdjustment);
739
670
}
740
671
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment ;
741
672
scalarCountAdjustment = TempScalarCountAdjustment + TailScalarCodeUnitCountAdjustment ;
@@ -746,7 +677,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
746
677
public unsafe static byte * GetPointerToFirstInvalidByteArm64 ( byte * pInputBuffer , int inputLength )
747
678
{
748
679
int processedLength = 0 ;
749
-
750
680
int TempUtf16CodeUnitCountAdjustment = 0 ;
751
681
int TempScalarCountAdjustment = 0 ;
752
682
0 commit comments