@@ -10,6 +10,70 @@ namespace SimdUnicode
10
10
public static class UTF8
11
11
{
12
12
13
+ // helper function for debugging: it prints a green byte every 32 bytes and a red byte at a given index
14
+ static void PrintHexAndBinary ( byte [ ] bytes , int highlightIndex = - 1 )
15
+ {
16
+ int chunkSize = 16 ; // 128 bits = 16 bytes
17
+
18
+ // Process each chunk for hexadecimal
19
+ Console . Write ( "Hex: " ) ;
20
+ for ( int i = 0 ; i < bytes . Length ; i ++ )
21
+ {
22
+ if ( i > 0 && i % chunkSize == 0 )
23
+ Console . WriteLine ( ) ; // New line after every 16 bytes
24
+
25
+ if ( i == highlightIndex )
26
+ {
27
+ Console . ForegroundColor = ConsoleColor . Red ;
28
+ Console . Write ( $ "{ bytes [ i ] : X2} ") ;
29
+ Console . ResetColor ( ) ;
30
+ }
31
+ else if ( i % ( chunkSize * 2 ) == 0 ) // print green every 256 bytes
32
+ {
33
+ Console . ForegroundColor = ConsoleColor . Green ;
34
+ Console . Write ( $ "{ bytes [ i ] : X2} ") ;
35
+ Console . ResetColor ( ) ;
36
+ }
37
+ else
38
+ {
39
+ Console . Write ( $ "{ bytes [ i ] : X2} ") ;
40
+ }
41
+
42
+ if ( ( i + 1 ) % chunkSize != 0 ) Console . Write ( " " ) ; // Add space between bytes but not at the end of the line
43
+ }
44
+ Console . WriteLine ( "\n " ) ; // New line for readability and to separate hex from binary
45
+
46
+ // Process each chunk for binary
47
+ Console . Write ( "Binary: " ) ;
48
+ for ( int i = 0 ; i < bytes . Length ; i ++ )
49
+ {
50
+ if ( i > 0 && i % chunkSize == 0 )
51
+ Console . WriteLine ( ) ; // New line after every 16 bytes
52
+
53
+ string binaryString = Convert . ToString ( bytes [ i ] , 2 ) . PadLeft ( 8 , '0' ) ;
54
+ if ( i == highlightIndex )
55
+ {
56
+ Console . ForegroundColor = ConsoleColor . Red ;
57
+ Console . Write ( $ "{ binaryString } ") ;
58
+ Console . ResetColor ( ) ;
59
+ }
60
+ else if ( i % ( chunkSize * 2 ) == 0 ) // print green every 256 bytes
61
+ {
62
+ Console . ForegroundColor = ConsoleColor . Green ;
63
+ Console . Write ( $ "{ binaryString } ") ;
64
+ Console . ResetColor ( ) ;
65
+ }
66
+ else
67
+ {
68
+ Console . Write ( $ "{ binaryString } ") ;
69
+ }
70
+
71
+ if ( ( i + 1 ) % chunkSize != 0 ) Console . Write ( " " ) ; // Add space between bytes but not at the end of the line
72
+ }
73
+ Console . WriteLine ( ) ; // New line for readability
74
+ }
75
+
76
+
13
77
static Func < byte , string > byteToBinaryString = b => Convert . ToString ( b , 2 ) . PadLeft ( 8 , '0' ) ; //for debugging
14
78
15
79
public unsafe static byte * RewindAndValidateWithErrors ( int howFarBack , byte * buf , int len , ref int utf16CodeUnitCountAdjustment , ref int scalarCountAdjustment )
@@ -188,38 +252,40 @@ public static class UTF8
188
252
public unsafe static ( int totalbyteadjustment , int backedupByHowMuch , int ascii , int contbyte , int n4 ) adjustmentFactor ( byte * pInputBuffer ) {
189
253
// Find the first non-continuation byte, working backward.
190
254
int i = 1 ;
255
+ int contbyteadjust = 0 ;
191
256
for ( ; i <= 4 ; i ++ )
192
257
{
193
258
if ( ( pInputBuffer [ - i ] & 0b11000000 ) != 0b10000000 )
194
259
{
195
260
break ;
196
261
}
262
+ contbyteadjust -= 1 ;
263
+
197
264
}
198
265
if ( ( pInputBuffer [ - i ] & 0b10000000 ) == 0 ) {
199
- return ( 0 , i , - 1 , 0 , 0 ) ; // We must have that i == 1
266
+ return ( 0 , i , - 1 , contbyteadjust , 0 ) ; // We must have that i == 1
200
267
}
201
268
if ( ( pInputBuffer [ - i ] & 0b11100000 ) == 0b11000000 ) {
202
- return ( 2 - i , i , 0 , 0 , 0 ) ; // We have that i == 1 or i == 2, if i == 1, we are missing one byte.
269
+ return ( 2 - i , i , 0 , contbyteadjust , 0 ) ; // We have that i == 1 or i == 2, if i == 1, we are missing one byte.
203
270
}
204
271
if ( ( pInputBuffer [ - i ] & 0b11110000 ) == 0b11100000 ) {
205
- return ( 3 - i , i , 0 , 0 , 0 ) ; // We have that i == 1 or i == 2 or i == 3, if i == 1, we are missing two bytes, if i == 2, we are missing one byte.
272
+ return ( 3 - i , i , 0 , contbyteadjust , 0 ) ; // We have that i == 1 or i == 2 or i == 3, if i == 1, we are missing two bytes, if i == 2, we are missing one byte.
206
273
}
207
274
// We must have that (pInputBuffer[-i] & 0b11111000) == 0b11110000
208
- return ( 4 - i , i , 0 , 0 , - 1 ) ; // We have that i == 1 or i == 2 or i == 3 or i == 4, if i == 1, we are missing three bytes, if i == 2, we are missing two bytes, if i == 3, we are missing one byte.
275
+ return ( 4 - i , i , 0 , contbyteadjust , - 1 ) ; // We have that i == 1 or i == 2 or i == 3 or i == 4, if i == 1, we are missing three bytes, if i == 2, we are missing two bytes, if i == 3, we are missing one byte.
209
276
}
210
277
211
278
public static ( int utfadjust , int scalaradjust ) CalculateN2N3FinalSIMDAdjustments ( int asciibytes , int n4 , int contbytes , int totalbyte )
212
279
{
213
- Console . WriteLine ( "---------" ) ;
214
- Console . WriteLine ( "CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte ) ;
280
+ Console . WriteLine ( "---------" ) ; //debug
281
+ Console . WriteLine ( "CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte ) ; //debug
215
282
int n3 = asciibytes - 2 * n4 + 2 * contbytes - totalbyte ;
216
283
int n2 = - 2 * asciibytes + n4 - 3 * contbytes + 2 * totalbyte ;
217
284
int utfadjust = - 2 * n4 - 2 * n3 - n2 ;
218
285
int scalaradjust = - n4 ;
219
286
220
- Console . WriteLine ( "CalculateN2N3FinalSIMDAdjustments's output debug. This is n3 count:" + n3 + " n2: " + n2 + " utfadjust:" + utfadjust + " scalaradjust:" + scalaradjust ) ;
287
+ Console . WriteLine ( "CalculateN2N3FinalSIMDAdjustments's output debug. This is n3 count:" + n3 + " n2: " + n2 + " utfadjust:" + utfadjust + " scalaradjust:" + scalaradjust ) ; //debug
221
288
222
-
223
289
return ( utfadjust , scalaradjust ) ;
224
290
}
225
291
@@ -395,7 +461,10 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
395
461
396
462
public unsafe static byte * GetPointerToFirstInvalidByteAvx2 ( byte * pInputBuffer , int inputLength , out int utf16CodeUnitCountAdjustment , out int scalarCountAdjustment )
397
463
{
398
- Console . WriteLine ( "-------------------------------------" ) ;
464
+ Console . ForegroundColor = ConsoleColor . Blue ; //debug
465
+ Console . WriteLine ( "-------------------------------------" ) ; //debug
466
+ Console . ResetColor ( ) ; //debug
467
+
399
468
int processedLength = 0 ;
400
469
int TempUtf16CodeUnitCountAdjustment = 0 ;
401
470
int TempScalarCountAdjustment = 0 ;
@@ -568,10 +637,17 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
568
637
Vector256 < byte > prev1 = Avx2 . AlignRight ( prevInputBlock , shuffled , ( byte ) ( 16 - 1 ) ) ;
569
638
// Vector256.Shuffle vs Avx2.Shuffle
570
639
// https://github.com/dotnet/runtime/blob/1400c1e7a888ea1e710e5c08d55c800e0b04bf8a/docs/coding-guidelines/vectorization-guidelines.md#vector256shuffle-vs-avx2shuffle
571
- Vector256 < byte > byte_1_high = Avx2 . Shuffle ( shuf1 , Avx2 . ShiftRightLogical ( prev1 . AsUInt16 ( ) , 4 ) . AsByte ( ) & v0f ) ;
572
- Vector256 < byte > byte_1_low = Avx2 . Shuffle ( shuf2 , ( prev1 & v0f ) ) ;
573
- Vector256 < byte > byte_2_high = Avx2 . Shuffle ( shuf3 , Avx2 . ShiftRightLogical ( currentBlock . AsUInt16 ( ) , 4 ) . AsByte ( ) & v0f ) ;
640
+ Vector256 < byte > byte_1_high = Avx2 . Shuffle ( shuf1 , Avx2 . ShiftRightLogical ( prev1 . AsUInt16 ( ) , 4 ) . AsByte ( ) & v0f ) ; // takes the XXXX 0000 part of the previous byte
641
+ Vector256 < byte > byte_1_low = Avx2 . Shuffle ( shuf2 , ( prev1 & v0f ) ) ; // takes the 0000 XXXX part of the previous part
642
+ Vector256 < byte > byte_2_high = Avx2 . Shuffle ( shuf3 , Avx2 . ShiftRightLogical ( currentBlock . AsUInt16 ( ) , 4 ) . AsByte ( ) & v0f ) ; // takes the XXXX 0000 part of the current byte
574
643
Vector256 < byte > sc = Avx2 . And ( Avx2 . And ( byte_1_high , byte_1_low ) , byte_2_high ) ;
644
+
645
+ // Create a span from the Vector256<byte>
646
+ // Console.WriteLine("");
647
+ // Span<byte> byteSpan = MemoryMarshal.Cast<Vector256<byte>, byte>(MemoryMarshal.CreateSpan(ref sc, 1));
648
+ // byte[] scbytes = byteSpan.ToArray();
649
+ // PrintHexAndBinary(scbytes);55555555555555555
650
+
575
651
Vector256 < byte > prev2 = Avx2 . AlignRight ( prevInputBlock , shuffled , ( byte ) ( 16 - 2 ) ) ;
576
652
Vector256 < byte > prev3 = Avx2 . AlignRight ( prevInputBlock , shuffled , ( byte ) ( 16 - 3 ) ) ;
577
653
Vector256 < byte > isThirdByte = Avx2 . SubtractSaturate ( prev2 , thirdByte ) ;
@@ -581,7 +657,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
581
657
Vector256 < byte > error = Avx2 . Xor ( must23As80 , sc ) ;
582
658
if ( ! Avx2 . TestZ ( error , error ) )
583
659
{
584
- Console . WriteLine ( "--Error!" ) ;
660
+ Console . WriteLine ( $ "--Error! @ { processedLength } bytes ") ; //debug
585
661
int totalbyteasciierror = processedLength - start_point ;
586
662
var ( utfadjustasciierror , scalaradjustasciierror ) = calculateErrorPathadjust ( start_point , processedLength , pInputBuffer , asciibytes , n4 , contbytes ) ;
587
663
@@ -608,25 +684,53 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
608
684
var ( totalbyteadjustment , i , tempascii , tempcont , tempn4 ) = adjustmentFactor ( pInputBuffer + processedLength + 32 ) ;
609
685
610
686
processedLength -= i ;
611
- n4 += tempn4 ;
687
+ n4 += tempn4 ; // this is + because the adjustment function returns something negative already
612
688
contbytes += tempcont ;
613
- Console . WriteLine ( $ "Unterminated! Backing up by { i } ") ;
614
-
689
+ Console . WriteLine ( $ "Unterminated! @ { processedLength } Backing up by { i } ") ; //debug
615
690
}
616
691
692
+
693
+
694
+
695
+
696
+ // Vector256<byte> contbyto = Vector256.Create((byte)(0b11000000u - 0x80));
697
+ // Vector256<byte> isStartOf4ByteSequence = Avx2.SubtractSaturate(currentBlock, fourthByte);
698
+ // Vector256<byte> isStartOf3OrMoreByteSequence = Avx2.SubtractSaturate(currentBlock, thirdByte);
699
+ // Vector256<byte> isStartOf2OrMoreByteSequence = Avx2.SubtractSaturate(currentBlock, secondByte);
700
+
701
+ // uint twoBytePlusCount = Popcnt.PopCount((uint)Avx2.MoveMask(isStartOf2OrMoreByteSequence));
702
+ // uint threeBytePlusCount = Popcnt.PopCount((uint)Avx2.MoveMask(isStartOf3OrMoreByteSequence));
703
+ // uint fourByteCount = Popcnt.PopCount((uint)Avx2.MoveMask(isStartOf4ByteSequence));
704
+
705
+
617
706
// No errors! Updating the variables we keep track of
618
707
// We use one instruction (MoveMask) to update ncon, plus one arithmetic operation.
619
- contbytes += ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( sc ) ) ;
708
+
709
+ // contbytes += (int)Popcnt.PopCount((uint)Avx2.MoveMask(sc)); // this actually counts the number of 2 consecutive continuous bytes
710
+ // Placeholder until andether way to do with contbyte is found
711
+
712
+ Vector256 < byte > top2bits = Vector256 . Create ( ( byte ) 0b11000000 ) ; // Mask to isolate the two most significant bits
713
+ Vector256 < byte > contbytemask = Vector256 . Create ( ( byte ) 0b10000000 ) ; // The expected pattern for continuation bytes: 10xxxxxx
620
714
715
+ // Apply the mask and compare
716
+ Vector256 < byte > maskedData = Avx2 . And ( currentBlock , top2bits ) ;
717
+ Vector256 < byte > compareResult = Avx2 . CompareEqual ( maskedData , contbytemask ) ;
718
+ // Move mask to get integer representation
719
+ contbytes += ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( compareResult ) ) ;
720
+
721
+
722
+
621
723
// We use two instructions (SubtractSaturate and MoveMask) to update n4, with one arithmetic operation.
622
724
n4 += ( int ) Popcnt . PopCount ( ( uint ) Avx2 . MoveMask ( Avx2 . SubtractSaturate ( currentBlock , fourthByte ) ) ) ;
623
725
}
726
+
727
+ // important: we just update asciibytes if there was no error.
728
+ // We count the number of ascii bytes in the block using just some simple arithmetic
729
+ // and no expensive operation:
624
730
asciibytes += ( int ) ( 32 - Popcnt . PopCount ( ( uint ) mask ) ) ;
625
731
}
626
732
627
- // important: we just update asciibytes if there was no error.
628
- // We count the number of ascii bytes in the block using just some simple arithmetic
629
- // and no expensive operation:
733
+
630
734
631
735
632
736
// There are 2 possible scenarios here : either
0 commit comments