2
2
using System . Runtime . Intrinsics ;
3
3
using System . Runtime . Intrinsics . X86 ;
4
4
using System . Linq ;
5
+ using System . Runtime . CompilerServices ;
5
6
6
7
7
8
// C# already have something that is *more or less* equivalent to our C++ simd class:
11
12
public static class Vector256Extensions
12
13
{
13
14
// Gets the second lane of the current vector and the first lane of the previous vector and returns, then shift it right by an appropriate number of bytes (less than 16, or less than 128 bits)
14
- // Checked
15
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
16
+
15
17
public static Vector256 < byte > Prev ( this Vector256 < byte > current , Vector256 < byte > prev , int N = 1 )
16
18
{
17
19
@@ -85,6 +87,7 @@ public static unsafe class Utf8Utility
85
87
86
88
87
89
// Returns a pointer to the first invalid byte in the input buffer if it's invalid, or a pointer to the end if it's valid.
90
+ // [MethodImpl(MethodImplOptions.AggressiveInlining)]
88
91
public static byte * GetPointerToFirstInvalidByte ( byte * pInputBuffer , int inputLength )
89
92
{
90
93
if ( pInputBuffer == null || inputLength <= 0 )
@@ -148,6 +151,7 @@ public utf8_checker()
148
151
// The original C++ implementation is much more extensive and assumes a 512 bit stream as well as several implementations
149
152
// In this case I focus solely on AVX2 instructions for prototyping and benchmarking purposes.
150
153
// This is the simplest least time-consuming implementation.
154
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
151
155
152
156
public void check_next_input ( Vector256 < byte > input )
153
157
{
@@ -159,7 +163,6 @@ public void check_next_input(Vector256<byte> input)
159
163
// Contains non-ASCII characters, process the vector
160
164
check_utf8_bytes ( input , prev_input_block ) ;
161
165
prev_incomplete = is_incomplete ( input ) ;
162
-
163
166
}
164
167
165
168
@@ -170,7 +173,8 @@ public void check_next_input(Vector256<byte> input)
170
173
171
174
}
172
175
173
- // Checked
176
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
177
+
174
178
public void check_utf8_bytes ( Vector256 < byte > input , Vector256 < byte > prev_input )
175
179
{
176
180
Vector256 < byte > prev1 = input . Prev ( prev_input , 1 ) ;
@@ -185,6 +189,7 @@ public void check_utf8_bytes(Vector256<byte> input, Vector256<byte> prev_input)
185
189
186
190
}
187
191
192
+ // [MethodImpl(MethodImplOptions.AggressiveInlining)]
188
193
189
194
public bool errors ( )
190
195
{
@@ -193,6 +198,8 @@ public bool errors()
193
198
return ! Avx2 . TestZ ( error , error ) ;
194
199
}
195
200
201
+ // [MethodImpl(MethodImplOptions.AggressiveInlining)]
202
+
196
203
public void check_eof ( )
197
204
{
198
205
// Console.WriteLine("Error Vector before check_eof(): " + VectorToString(error));
@@ -203,6 +210,8 @@ public void check_eof()
203
210
204
211
}
205
212
213
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
214
+
206
215
// This corresponds to section 6.1 e.g Table 6 of the paper e.g. 1-2 bytes
207
216
private Vector256 < byte > check_special_cases ( Vector256 < byte > input , Vector256 < byte > prev1 )
208
217
{
@@ -267,6 +276,8 @@ private Vector256<byte> check_special_cases(Vector256<byte> input, Vector256<byt
267
276
return Avx2 . And ( Avx2 . And ( byte_1_high , byte_1_low ) , byte_2_high ) ;
268
277
}
269
278
279
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
280
+
270
281
private Vector256 < byte > check_multibyte_lengths ( Vector256 < byte > input , Vector256 < byte > prev_input , Vector256 < byte > sc )
271
282
{
272
283
// Console.WriteLine("sc: " + VectorToString(sc));
@@ -289,6 +300,8 @@ private Vector256<byte> check_multibyte_lengths(Vector256<byte> input, Vector256
289
300
return Avx2 . Xor ( must23_80 , sc ) ;
290
301
}
291
302
303
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
304
+
292
305
private Vector256 < byte > must_be_2_3_continuation ( Vector256 < byte > prev2 , Vector256 < byte > prev3 )
293
306
{
294
307
Vector256 < byte > is_third_byte = Avx2 . SubtractSaturate ( prev2 , Vector256 . Create ( ( byte ) ( 0b11100000u - 1 ) ) ) ;
@@ -303,6 +316,8 @@ private Vector256<byte> must_be_2_3_continuation(Vector256<byte> prev2, Vector25
303
316
return comparisonResult . AsByte ( ) ;
304
317
}
305
318
319
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
320
+
306
321
private Vector256 < byte > is_incomplete ( Vector256 < byte > input )
307
322
{
308
323
// Console.WriteLine("Input Vector is_incomplete: " + VectorToString(input));
@@ -321,6 +336,8 @@ private Vector256<byte> is_incomplete(Vector256<byte> input)
321
336
return result ;
322
337
}
323
338
339
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
340
+
324
341
private Vector256 < byte > SaturatingSubtractUnsigned ( Vector256 < byte > left , Vector256 < byte > right )
325
342
{
326
343
if ( ! Avx2 . IsSupported )
@@ -336,6 +353,7 @@ private Vector256<byte> SaturatingSubtractUnsigned(Vector256<byte> left, Vector2
336
353
return subtractionResult . AsByte ( ) ;
337
354
}
338
355
356
+
339
357
// Helper functions for debugging
340
358
private string VectorToString ( Vector256 < byte > vector )
341
359
{
0 commit comments