@@ -128,6 +128,160 @@ public static class UTF8
128
128
const byte OVERLONG_4 = 1 << 6 ;
129
129
const byte CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS ;
130
130
131
+ public unsafe static byte * GetPointerToFirstInvalidByteSse ( byte * pInputBuffer , int inputLength )
132
+ {
133
+
134
+ int processedLength = 0 ;
135
+
136
+ if ( pInputBuffer == null || inputLength <= 0 )
137
+ {
138
+ return pInputBuffer ;
139
+ }
140
+ if ( inputLength > 128 )
141
+ {
142
+ // We skip any ASCII characters at the start of the buffer
143
+ int asciirun = 0 ;
144
+ for ( ; asciirun + 64 <= inputLength ; asciirun += 64 )
145
+ {
146
+ Vector128 < byte > block1 = Avx . LoadVector128 ( pInputBuffer + asciirun ) ;
147
+ Vector128 < byte > block2 = Avx . LoadVector128 ( pInputBuffer + asciirun + 16 ) ;
148
+ Vector128 < byte > block3 = Avx . LoadVector128 ( pInputBuffer + asciirun + 32 ) ;
149
+ Vector128 < byte > block4 = Avx . LoadVector128 ( pInputBuffer + asciirun + 48 ) ;
150
+
151
+ Vector128 < byte > or = Sse2 . Or ( Sse2 . Or ( block1 , block2 ) , Sse2 . Or ( block3 , block4 ) ) ;
152
+ if ( Sse2 . MoveMask ( or ) != 0 )
153
+ {
154
+ break ;
155
+ }
156
+ }
157
+ processedLength = asciirun ;
158
+
159
+ if ( processedLength + 16 < inputLength )
160
+ {
161
+ // We still have work to do!
162
+ Vector128 < byte > prevInputBlock = Vector128 < byte > . Zero ;
163
+
164
+ Vector128 < byte > maxValue = Vector128 . Create (
165
+ 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 ,
166
+ 255 , 255 , 255 , 255 , 255 , 0b11110000 - 1 , 0b11100000 - 1 , 0b11000000 - 1 ) ;
167
+ Vector128 < byte > prevIncomplete = Sse2 . SubtractSaturate ( prevInputBlock , maxValue ) ;
168
+
169
+
170
+ Vector128 < byte > shuf1 = Vector128 . Create ( TOO_LONG , TOO_LONG , TOO_LONG , TOO_LONG ,
171
+ TOO_LONG , TOO_LONG , TOO_LONG , TOO_LONG ,
172
+ TWO_CONTS , TWO_CONTS , TWO_CONTS , TWO_CONTS ,
173
+ TOO_SHORT | OVERLONG_2 ,
174
+ TOO_SHORT ,
175
+ TOO_SHORT | OVERLONG_3 | SURROGATE ,
176
+ TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 ) ;
177
+
178
+ Vector128 < byte > shuf2 = Vector128 . Create ( CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4 ,
179
+ CARRY | OVERLONG_2 ,
180
+ CARRY ,
181
+ CARRY ,
182
+ CARRY | TOO_LARGE ,
183
+ CARRY | TOO_LARGE | TOO_LARGE_1000 ,
184
+ CARRY | TOO_LARGE | TOO_LARGE_1000 ,
185
+ CARRY | TOO_LARGE | TOO_LARGE_1000 ,
186
+ CARRY | TOO_LARGE | TOO_LARGE_1000 ,
187
+ CARRY | TOO_LARGE | TOO_LARGE_1000 ,
188
+ CARRY | TOO_LARGE | TOO_LARGE_1000 ,
189
+ CARRY | TOO_LARGE | TOO_LARGE_1000 ,
190
+ CARRY | TOO_LARGE | TOO_LARGE_1000 ,
191
+ CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE ,
192
+ CARRY | TOO_LARGE | TOO_LARGE_1000 ,
193
+ CARRY | TOO_LARGE | TOO_LARGE_1000 ) ;
194
+ Vector128 < byte > shuf3 = Vector128 . Create ( TOO_SHORT , TOO_SHORT , TOO_SHORT , TOO_SHORT ,
195
+ TOO_SHORT , TOO_SHORT , TOO_SHORT , TOO_SHORT ,
196
+ TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4 ,
197
+ TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE ,
198
+ TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE ,
199
+ TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE ,
200
+ TOO_SHORT , TOO_SHORT , TOO_SHORT , TOO_SHORT ) ;
201
+
202
+ Vector128 < byte > thirdByte = Vector128 . Create ( ( byte ) ( 0b11100000u - 0x80 ) ) ;
203
+ Vector128 < byte > fourthByte = Vector128 . Create ( ( byte ) ( 0b11110000u - 0x80 ) ) ;
204
+ Vector128 < byte > v0f = Vector128 . Create ( ( byte ) 0x0F ) ;
205
+ Vector128 < byte > v80 = Vector128 . Create ( ( byte ) 0x80 ) ;
206
+
207
+ for ( ; processedLength + 16 <= inputLength ; processedLength += 16 )
208
+ {
209
+
210
+ Vector128 < byte > currentBlock = Sse2 . LoadVector128 ( pInputBuffer + processedLength ) ;
211
+
212
+ int mask = Sse2 . MoveMask ( currentBlock ) ;
213
+ if ( mask == 0 )
214
+ {
215
+ // Console.WriteLine("ascii");
216
+
217
+ // We have an ASCII block, no need to process it, but
218
+ // we need to check if the previous block was incomplete.
219
+ if ( Sse2 . MoveMask ( prevIncomplete ) != 0 )
220
+ {
221
+ // return pInputBuffer + processedLength;
222
+
223
+ // Console.WriteLine("not ascii");
224
+ return SimdUnicode . UTF8 . RewindAndValidateWithErrors ( processedLength , pInputBuffer + processedLength , inputLength - processedLength ) ;
225
+ }
226
+ prevIncomplete = Vector128 < byte > . Zero ;
227
+ }
228
+ else
229
+ {
230
+ // Contains non-ASCII characters, we need to do non-trivial processing
231
+ Vector128 < byte > prev1 = Ssse3 . AlignRight ( currentBlock , prevInputBlock , ( byte ) ( 16 - 1 ) ) ;
232
+ Vector128 < byte > byte_1_high = Ssse3 . Shuffle ( shuf1 , Sse2 . ShiftRightLogical ( prev1 . AsUInt16 ( ) , 4 ) . AsByte ( ) & v0f ) ;
233
+ Vector128 < byte > byte_1_low = Ssse3 . Shuffle ( shuf2 , ( prev1 & v0f ) ) ;
234
+ Vector128 < byte > byte_2_high = Ssse3 . Shuffle ( shuf3 , Sse2 . ShiftRightLogical ( currentBlock . AsUInt16 ( ) , 4 ) . AsByte ( ) & v0f ) ;
235
+ Vector128 < byte > sc = Sse2 . And ( Sse2 . And ( byte_1_high , byte_1_low ) , byte_2_high ) ;
236
+ Vector128 < byte > prev2 = Ssse3 . AlignRight ( currentBlock , prevInputBlock , ( byte ) ( 16 - 2 ) ) ;
237
+ Vector128 < byte > prev3 = Ssse3 . AlignRight ( currentBlock , prevInputBlock , ( byte ) ( 16 - 3 ) ) ;
238
+ prevInputBlock = currentBlock ;
239
+ Vector128 < byte > isThirdByte = Sse2 . SubtractSaturate ( prev2 , thirdByte ) ;
240
+ Vector128 < byte > isFourthByte = Sse2 . SubtractSaturate ( prev3 , fourthByte ) ;
241
+ Vector128 < byte > must23 = Sse2 . Or ( isThirdByte , isFourthByte ) ;
242
+ Vector128 < byte > must23As80 = Sse2 . And ( must23 , v80 ) ;
243
+ Vector128 < byte > error = Sse2 . Xor ( must23As80 , sc ) ;
244
+ if ( Sse2 . MoveMask ( error ) != 0 )
245
+ {
246
+ return SimdUnicode . UTF8 . RewindAndValidateWithErrors ( processedLength , pInputBuffer + processedLength , inputLength - processedLength ) ;
247
+ }
248
+ prevIncomplete = Sse2 . SubtractSaturate ( currentBlock , maxValue ) ;
249
+ }
250
+ }
251
+ }
252
+ }
253
+ // We have processed all the blocks using SIMD, we need to process the remaining bytes.
254
+
255
+ // Process the remaining bytes with the scalar function
256
+ if ( processedLength < inputLength )
257
+ {
258
+ // We need to possibly backtrack to the start of the last code point
259
+ // worst possible case is 4 bytes, where we need to backtrack 3 bytes
260
+ // 11110xxxx 10xxxxxx 10xxxxxx 10xxxxxx <== we might be pointing at the last byte
261
+ if ( processedLength > 0 && ( sbyte ) pInputBuffer [ processedLength ] <= - 65 )
262
+ {
263
+ processedLength -= 1 ;
264
+ if ( processedLength > 0 && ( sbyte ) pInputBuffer [ processedLength ] <= - 65 )
265
+ {
266
+ processedLength -= 1 ;
267
+ if ( processedLength > 0 && ( sbyte ) pInputBuffer [ processedLength ] <= - 65 )
268
+ {
269
+ processedLength -= 1 ;
270
+ }
271
+ }
272
+ }
273
+ byte * invalidBytePointer = SimdUnicode . UTF8 . GetPointerToFirstInvalidByteScalar ( pInputBuffer + processedLength , inputLength - processedLength ) ;
274
+ if ( invalidBytePointer != pInputBuffer + inputLength )
275
+ {
276
+ // An invalid byte was found by the scalar function
277
+ return invalidBytePointer ;
278
+ }
279
+ }
280
+
281
+ return pInputBuffer + inputLength ;
282
+ }
283
+
284
+
131
285
public unsafe static byte * GetPointerToFirstInvalidByteAvx2 ( byte * pInputBuffer , int inputLength )
132
286
{
133
287
int processedLength = 0 ;
@@ -461,16 +615,14 @@ public static class UTF8
461
615
{
462
616
return GetPointerToFirstInvalidByteAvx2 ( pInputBuffer , inputLength ) ;
463
617
}
464
- // TODO add support for other ISAs
465
- //if (Vector512.IsHardwareAccelerated && Avx512Vbmi2.IsSupported)
466
- //{
467
- // return GetPointerToFirstInvalidByteAvx512(pInputBuffer, inputLength);
468
- //GetPointerToFirstInvalidByteAvx2
469
- //}
470
- //if (Sse42.IsSupported)
471
- //{
472
- // return GetPointerToFirstInvalidByteSse(pInputBuffer, inputLength);
473
- //}
618
+ /*if (Vector512.IsHardwareAccelerated && Avx512Vbmi2.IsSupported)
619
+ {
620
+ return GetPointerToFirstInvalidByteAvx512(pInputBuffer, inputLength);
621
+ }*/
622
+ if ( Ssse3 . IsSupported )
623
+ {
624
+ return GetPointerToFirstInvalidByteSse ( pInputBuffer , inputLength ) ;
625
+ }
474
626
return GetPointerToFirstInvalidByteScalar ( pInputBuffer , inputLength ) ;
475
627
}
476
628
0 commit comments