@@ -141,7 +141,7 @@ public static unsafe class Utf8Utility
141
141
public static byte * GetPointerToFirstInvalidByte ( byte * pInputBuffer , int inputLength )
142
142
{
143
143
144
-
144
+ var checker = new Utf8Validation . utf8_checker ( ) ;
145
145
146
146
int processedLength = 0 ;
147
147
@@ -153,10 +153,15 @@ public static unsafe class Utf8Utility
153
153
while ( processedLength + 64 <= inputLength )
154
154
{
155
155
156
- SIMDGetPointerToFirstInvalidByte ( pInputBuffer , processedLength ) ;
157
-
158
- Utf8Validation . utf8_checker . CheckEof ( ) ;
159
- if ( Utf8Validation . utf8_checker . Errors ( ) )
156
+ // SIMDGetPointerToFirstInvalidByte(pInputBuffer,processedLength);
157
+
158
+ Vector256 < byte > currentBlock = Avx . LoadVector256 ( pInputBuffer + processedLength ) ;
159
+ checker . CheckNextInput ( currentBlock ) ;
160
+ currentBlock = Avx . LoadVector256 ( pInputBuffer + processedLength ) ;
161
+ checker . CheckNextInput ( currentBlock ) ;
162
+
163
+ checker . CheckEof ( ) ;
164
+ if ( checker . Errors ( ) )
160
165
{
161
166
// return pInputBuffer + processedLength;
162
167
return SimdUnicode . UTF8 . RewindAndValidateWithErrors ( pInputBuffer + processedLength , inputLength - processedLength ) ;
@@ -220,10 +225,10 @@ public static unsafe class Utf8Utility
220
225
221
226
ReadOnlySpan < Byte > remainingBytesReadOnly = remainingBytes ;
222
227
Vector256 < byte > remainingBlock = Vector256 . Create ( remainingBytesReadOnly ) ;
223
- Utf8Validation . utf8_checker . CheckNextInput ( remainingBlock ) ;
228
+ checker . CheckNextInput ( remainingBlock ) ;
224
229
225
- Utf8Validation . utf8_checker . CheckEof ( ) ;
226
- if ( Utf8Validation . utf8_checker . Errors ( ) )
230
+ checker . CheckEof ( ) ;
231
+ if ( checker . Errors ( ) )
227
232
{
228
233
// return pInputBuffer + processedLength;
229
234
return SimdUnicode . UTF8 . GetPointerToFirstInvalidByte ( pInputBuffer + processedLength , inputLength - processedLength ) ;
@@ -241,28 +246,28 @@ public static unsafe class Utf8Utility
241
246
242
247
// Returns a pointer to the first invalid byte in the input buffer if it's invalid, or a pointer to the end if it's valid.
243
248
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
244
- public static byte * SIMDGetPointerToFirstInvalidByte ( byte * pInputBuffer , int processedLength )
245
- {
246
- ////////////////
247
- // TODO: I recommend taking this code and calling it something
248
- // else. Then have the current function (GetPointerToFirstInvalidByte)
249
- // call the SIMD function only if inputLength is sufficiently large (maybe 64 bytes),
250
- // otherwise, use the scalar function.
251
- ////////////////
249
+ // public static byte* SIMDGetPointerToFirstInvalidByte(byte* pInputBuffer, int processedLength)
250
+ // {
251
+ // ////////////////
252
+ // // TODO: I recommend taking this code and calling it something
253
+ // // else. Then have the current function (GetPointerToFirstInvalidByte)
254
+ // // call the SIMD function only if inputLength is sufficiently large (maybe 64 bytes),
255
+ // // otherwise, use the scalar function.
256
+ // ////////////////
252
257
253
258
254
259
255
- Vector256 < byte > currentBlock = Avx . LoadVector256 ( pInputBuffer + processedLength ) ;
256
- Utf8Validation . utf8_checker . CheckNextInput ( currentBlock ) ;
260
+ // Vector256<byte> currentBlock = Avx.LoadVector256(pInputBuffer + processedLength);
261
+ // checker .CheckNextInput(currentBlock);
257
262
258
- processedLength += 32 ;
263
+ // processedLength += 32;
259
264
260
- currentBlock = Avx . LoadVector256 ( pInputBuffer + processedLength ) ;
261
- Utf8Validation . utf8_checker . CheckNextInput ( currentBlock ) ;
262
- processedLength += 32 ;
265
+ // currentBlock = Avx.LoadVector256(pInputBuffer + processedLength);
266
+ // checker .CheckNextInput(currentBlock);
267
+ // processedLength += 32;
263
268
264
- return pInputBuffer + processedLength ;
265
- }
269
+ // return pInputBuffer + processedLength;
270
+ // }
266
271
}
267
272
268
273
// C# docs suggests that classes are allocated on the heap:
@@ -273,9 +278,9 @@ public struct utf8_checker
273
278
{
274
279
275
280
276
- static Vector256 < byte > error = Vector256 < byte > . Zero ;
277
- static Vector256 < byte > prev_input_block = Vector256 < byte > . Zero ;
278
- static Vector256 < byte > prev_incomplete = Vector256 < byte > . Zero ;
281
+ Vector256 < byte > error = Vector256 < byte > . Zero ;
282
+ Vector256 < byte > prev_input_block = Vector256 < byte > . Zero ;
283
+ Vector256 < byte > prev_incomplete = Vector256 < byte > . Zero ;
279
284
280
285
// Explicit constructor
281
286
public utf8_checker ( )
@@ -292,7 +297,7 @@ public utf8_checker()
292
297
// This is the simplest least time-consuming implementation.
293
298
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
294
299
295
- public static void CheckNextInput ( Vector256 < byte > input )
300
+ public void CheckNextInput ( Vector256 < byte > input )
296
301
{
297
302
// Compiles to:
298
303
/*
@@ -358,7 +363,7 @@ je G_M000_IG04
358
363
359
364
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
360
365
361
- public static void CheckUtf8Bytes ( Vector256 < byte > input )
366
+ public void CheckUtf8Bytes ( Vector256 < byte > input )
362
367
{
363
368
// compiles to
364
369
// vmovups ymm0, ymmword ptr [rcx]
@@ -399,7 +404,7 @@ public static void CheckUtf8Bytes(Vector256<byte> input)
399
404
400
405
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
401
406
402
- public static bool Errors ( )
407
+ public bool Errors ( )
403
408
{
404
409
// Console.WriteLine("Error Vector at the end: " + VectorToString(error));
405
410
// compiles to:
@@ -411,7 +416,7 @@ public static bool Errors()
411
416
412
417
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
413
418
414
- public static void CheckEof ( )
419
+ public void CheckEof ( )
415
420
{
416
421
// Console.WriteLine("Error Vector before check_eof(): " + VectorToString(error));
417
422
// Console.WriteLine("prev_incomplete Vector in check_eof(): " + VectorToString(prev_incomplete));
@@ -437,7 +442,7 @@ public static void CheckEof()
437
442
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
438
443
439
444
// This corresponds to section 6.1 e.g Table 6 of the paper e.g. 1-2 bytes
440
- private static Vector256 < byte > CheckSpecialCases ( Vector256 < byte > input , Vector256 < byte > prev1 )
445
+ private Vector256 < byte > CheckSpecialCases ( Vector256 < byte > input , Vector256 < byte > prev1 )
441
446
{
442
447
443
448
// define bits that indicate error code
@@ -533,7 +538,7 @@ private static Vector256<byte> CheckSpecialCases(Vector256<byte> input, Vector25
533
538
}
534
539
535
540
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
536
- private static Vector256 < byte > CheckMultibyteLengths ( Vector256 < byte > input , Vector256 < byte > prev_input , Vector256 < byte > sc )
541
+ private Vector256 < byte > CheckMultibyteLengths ( Vector256 < byte > input , Vector256 < byte > prev_input , Vector256 < byte > sc )
537
542
{
538
543
// Console.WriteLine("sc: " + VectorToString(sc));
539
544
@@ -564,7 +569,7 @@ private static Vector256<byte> CheckMultibyteLengths(Vector256<byte> input, Vect
564
569
}
565
570
566
571
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
567
- private static Vector256 < byte > MustBe23Continuation ( Vector256 < byte > prev2 , Vector256 < byte > prev3 )
572
+ private Vector256 < byte > MustBe23Continuation ( Vector256 < byte > prev2 , Vector256 < byte > prev3 )
568
573
{
569
574
// Compiles to
570
575
// vmovups ymm0, ymmword ptr [rdx]
@@ -598,7 +603,7 @@ private static Vector256<byte> MustBe23Continuation(Vector256<byte> prev2, Vecto
598
603
599
604
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
600
605
601
- private static Vector256 < byte > IsIncomplete ( Vector256 < byte > input )
606
+ private Vector256 < byte > IsIncomplete ( Vector256 < byte > input )
602
607
{
603
608
// Console.WriteLine("Input Vector is_incomplete: " + VectorToString(input));
604
609
// byte[] maxArray = new byte[32]
@@ -624,7 +629,7 @@ private static Vector256<byte> IsIncomplete(Vector256<byte> input)
624
629
625
630
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
626
631
627
- private static Vector256 < byte > SaturatingSubtractUnsigned ( Vector256 < byte > left , Vector256 < byte > right )
632
+ private Vector256 < byte > SaturatingSubtractUnsigned ( Vector256 < byte > left , Vector256 < byte > right )
628
633
{
629
634
// Compiles to
630
635
// vpsubusw ymm0, ymm0, ymmword ptr [r8]
0 commit comments