@@ -165,7 +165,8 @@ public static unsafe class Utf8Utility
165
165
166
166
}
167
167
168
- // First fix bencrmarks static utf checker
168
+ // First fix bencrmarks static utf checker
169
+ //
169
170
// | Method | FileName | Mean | Error | StdDev | Allocated |
170
171
// |---------------------------- |----------------------- |-----------:|----------:|-----------:|----------:|
171
172
// | SIMDUtf8ValidationRealData | data/arabic.utf8.txt | 478.655 us | 8.9312 us | 15.4059 us | - |
@@ -212,24 +213,63 @@ public static unsafe class Utf8Utility
212
213
// | SIMDUtf8ValidationErrorData | data/japanese.utf8.txt | 75.751 us | 0.9603 us | 0.7498 us | - |
213
214
// | SIMDUtf8ValidationRealData | data/turkish.utf8.txt | 173.199 us | 3.4289 us | 5.4386 us | - |
214
215
// | SIMDUtf8ValidationErrorData | data/turkish.utf8.txt | 112.989 us | 1.7684 us | 1.5677 us | - |
216
+ // if (processedLength < inputLength)
217
+ // {
218
+
219
+ // Span<byte> remainingBytes = stackalloc byte[64];
220
+ // new Span<byte>(pInputBuffer + processedLength, inputLength - processedLength).CopyTo(remainingBytes);
221
+
222
+ // ReadOnlySpan<Byte> remainingBytesReadOnly = remainingBytes;
223
+ // Vector256<byte> remainingBlock = Vector256.Create(remainingBytesReadOnly);
224
+ // Utf8Validation.utf8_checker.CheckNextInput(remainingBlock);
225
+
226
+ // Utf8Validation.utf8_checker.CheckEof();
227
+ // if (Utf8Validation.utf8_checker.Errors())
228
+ // {
229
+ // // return pInputBuffer + processedLength;
230
+ // return SimdUnicode.UTF8.GetPointerToFirstInvalidByte(pInputBuffer + processedLength,inputLength - processedLength);
231
+ // }
232
+ // processedLength += inputLength - processedLength;
233
+
234
+ // }
235
+
236
+ // | Method | FileName | Mean | Error | StdDev | Allocated |
237
+ // |---------------------------- |----------------------- |-----------:|----------:|----------:|----------:|
238
+ // | SIMDUtf8ValidationRealData | data/arabic.utf8.txt | 454.353 us | 6.0327 us | 5.3478 us | - |
239
+ // | SIMDUtf8ValidationErrorData | data/arabic.utf8.txt | 278.734 us | 5.3031 us | 5.8943 us | - |
240
+ // | SIMDUtf8ValidationRealData | data/chinese.utf8.txt | 127.542 us | 2.2544 us | 2.1087 us | - |
241
+ // | SIMDUtf8ValidationErrorData | data/chinese.utf8.txt | 15.822 us | 0.3030 us | 0.3832 us | - |
242
+ // | SIMDUtf8ValidationRealData | data/english.utf8.txt | 11.016 us | 0.1309 us | 0.1225 us | - |
243
+ // | SIMDUtf8ValidationErrorData | data/english.utf8.txt | 11.030 us | 0.1580 us | 0.1400 us | - |
244
+ // | SIMDUtf8ValidationRealData | data/french.utf8.txt | 12.547 us | 0.0740 us | 0.0656 us | - |
245
+ // | SIMDUtf8ValidationErrorData | data/french.utf8.txt | 12.652 us | 0.1455 us | 0.1290 us | - |
246
+ // | SIMDUtf8ValidationRealData | data/german.utf8.txt | 5.755 us | 0.0277 us | 0.0246 us | - |
247
+ // | SIMDUtf8ValidationErrorData | data/german.utf8.txt | 5.669 us | 0.0079 us | 0.0070 us | - |
248
+ // | SIMDUtf8ValidationRealData | data/japanese.utf8.txt | 130.835 us | 0.5999 us | 0.5612 us | - |
249
+ // | SIMDUtf8ValidationErrorData | data/japanese.utf8.txt | 71.814 us | 1.0399 us | 0.9727 us | - |
250
+ // | SIMDUtf8ValidationRealData | data/turkish.utf8.txt | 167.163 us | 3.1610 us | 4.1103 us | - |
251
+ // | SIMDUtf8ValidationErrorData | data/turkish.utf8.txt | 109.607 us | 0.6636 us | 0.5542 us | - |
252
+
253
+
215
254
if ( processedLength < inputLength )
216
255
{
217
256
218
257
Span < byte > remainingBytes = stackalloc byte [ 64 ] ;
219
- new Span < byte > ( pInputBuffer + processedLength , inputLength - processedLength ) . CopyTo ( remainingBytes ) ;
258
+ for ( int i = 0 ; i < inputLength - processedLength ; i ++ )
259
+ {
260
+ remainingBytes [ i ] = pInputBuffer [ processedLength + i ] ;
261
+ }
220
262
221
263
ReadOnlySpan < Byte > remainingBytesReadOnly = remainingBytes ;
222
264
Vector256 < byte > remainingBlock = Vector256 . Create ( remainingBytesReadOnly ) ;
223
265
Utf8Validation . utf8_checker . CheckNextInput ( remainingBlock ) ;
224
-
225
266
Utf8Validation . utf8_checker . CheckEof ( ) ;
226
267
if ( Utf8Validation . utf8_checker . Errors ( ) )
227
268
{
228
269
// return pInputBuffer + processedLength;
229
270
return SimdUnicode . UTF8 . GetPointerToFirstInvalidByte ( pInputBuffer + processedLength , inputLength - processedLength ) ;
230
271
}
231
272
processedLength += inputLength - processedLength ;
232
-
233
273
}
234
274
235
275
@@ -239,7 +279,7 @@ public static unsafe class Utf8Utility
239
279
240
280
}
241
281
242
- // Returns a pointer to the first invalid byte in the input buffer if it's invalid, or a pointer to the end if it's valid.
282
+ Returns a pointer to the first invalid byte in the input buffer if it's invalid, or a pointer to the end if it's valid.
243
283
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
244
284
public static byte * SIMDGetPointerToFirstInvalidByte ( byte * pInputBuffer , int processedLength )
245
285
{
0 commit comments