@@ -154,27 +154,64 @@ public static unsafe class Utf8Utility
154
154
155
155
// Helpers.CheckForGCCollections("After AVX2 procession");
156
156
157
- if ( processedLength < inputLength )
158
- {
159
- // Unfortunalely, this approach with stackalloc might be expensive.
160
- // TODO: replace it by a simple scalar routine. You need to handle
161
- // prev_incomplete but it should be doable.
157
+
158
+ // | Method | FileName | Mean | Error | StdDev | Allocated |
159
+ // |---------------------------- |----------------------- |----------:|----------:|----------:|----------:|
160
+ // | SIMDUtf8ValidationRealData | data/arabic.utf8.txt | 33.062 us | 0.5046 us | 0.4720 us | 56 B |
161
+ // | SIMDUtf8ValidationErrorData | data/arabic.utf8.txt | 35.609 us | 0.3369 us | 0.3152 us | 56 B |
162
+ // | SIMDUtf8ValidationRealData | data/chinese.utf8.txt | 11.603 us | 0.2232 us | 0.2293 us | 56 B |
163
+ // | SIMDUtf8ValidationErrorData | data/chinese.utf8.txt | 12.317 us | 0.1826 us | 0.1708 us | 56 B |
164
+ // | SIMDUtf8ValidationRealData | data/english.utf8.txt | 13.726 us | 0.2471 us | 0.2311 us | 56 B |
165
+ // | SIMDUtf8ValidationErrorData | data/english.utf8.txt | 13.392 us | 0.0520 us | 0.0487 us | 56 B |
166
+ // | SIMDUtf8ValidationRealData | data/french.utf8.txt | 24.345 us | 0.2012 us | 0.1882 us | 56 B |
167
+ // | SIMDUtf8ValidationErrorData | data/french.utf8.txt | 23.778 us | 0.1892 us | 0.1769 us | 56 B |
168
+ // | SIMDUtf8ValidationRealData | data/german.utf8.txt | 9.323 us | 0.0155 us | 0.0130 us | 56 B |
169
+ // | SIMDUtf8ValidationErrorData | data/german.utf8.txt | 8.336 us | 0.0502 us | 0.0470 us | 56 B |
170
+ // | SIMDUtf8ValidationRealData | data/japanese.utf8.txt | 10.728 us | 0.1370 us | 0.1282 us | 56 B |
171
+ // | SIMDUtf8ValidationErrorData | data/japanese.utf8.txt | 10.837 us | 0.1389 us | 0.1300 us | 56 B |
172
+ // | SIMDUtf8ValidationRealData | data/turkish.utf8.txt | 11.086 us | 0.1190 us | 0.1113 us | 56 B |
173
+ // | SIMDUtf8ValidationErrorData | data/turkish.utf8.txt | 10.017 us | 0.0615 us | 0.0514 us | 56 B |
162
174
163
- Span < byte > remainingBytes = stackalloc byte [ 32 ] ;
164
- for ( int i = 0 ; i < inputLength - processedLength ; i ++ )
165
- {
166
- remainingBytes [ i ] = pInputBuffer [ processedLength + i ] ;
167
- }
168
175
169
- Vector256 < byte > remainingBlock = Vector256 . Create ( remainingBytes . ToArray ( ) ) ;
170
- Utf8Validation . utf8_checker . CheckNextInput ( remainingBlock , ref prev_input_block , ref prev_incomplete , ref error ) ;
171
- processedLength += inputLength - processedLength ;
176
+ // if (processedLength < inputLength)
177
+ // {
178
+ // // Unfortunalely, this approach with stackalloc might be expensive.
179
+ // // TODO: replace it by a simple scalar routine. You need to handle
180
+ // // prev_incomplete but it should be doable.
172
181
173
- }
174
182
175
- // CheckForGCCollections("After processed remaining bytes");
183
+ // Span<byte> remainingBytes = stackalloc byte[32];
184
+ // for (int i = 0; i < inputLength - processedLength; i++)
185
+ // {
186
+ // remainingBytes[i] = pInputBuffer[processedLength + i];
187
+ // }
176
188
189
+ // Vector256<byte> remainingBlock = Vector256.Create(remainingBytes.ToArray());
190
+ // Utf8Validation.utf8_checker.CheckNextInput(remainingBlock, ref prev_input_block, ref prev_incomplete, ref error);
191
+ // processedLength += inputLength - processedLength;
192
+
193
+ // }
194
+
195
+ // CheckForGCCollections("After processed remaining bytes");
177
196
197
+ // | Method | FileName | Mean | Error | StdDev | Median | Allocated |
198
+ // |---------------------------- |----------------------- |----------:|----------:|----------:|----------:|----------:|
199
+ // | SIMDUtf8ValidationRealData | data/arabic.utf8.txt | 36.968 us | 0.4637 us | 0.4111 us | 37.018 us | 56 B |
200
+ // | SIMDUtf8ValidationErrorData | data/arabic.utf8.txt | 31.851 us | 0.2252 us | 0.2107 us | 31.865 us | 56 B |
201
+ // | SIMDUtf8ValidationRealData | data/chinese.utf8.txt | 10.541 us | 0.0870 us | 0.0814 us | 10.490 us | 56 B |
202
+ // | SIMDUtf8ValidationErrorData | data/chinese.utf8.txt | 12.404 us | 0.2447 us | 0.2913 us | 12.355 us | 56 B |
203
+ // | SIMDUtf8ValidationRealData | data/english.utf8.txt | 14.297 us | 0.2786 us | 0.4655 us | 14.225 us | 56 B |
204
+ // | SIMDUtf8ValidationErrorData | data/english.utf8.txt | 14.207 us | 0.0272 us | 0.0241 us | 14.211 us | 56 B |
205
+ // | SIMDUtf8ValidationRealData | data/french.utf8.txt | 23.993 us | 0.2287 us | 0.2140 us | 23.879 us | 56 B |
206
+ // | SIMDUtf8ValidationErrorData | data/french.utf8.txt | 26.856 us | 0.5314 us | 1.4367 us | 27.005 us | 56 B |
207
+ // | SIMDUtf8ValidationRealData | data/german.utf8.txt | 8.541 us | 0.1702 us | 0.2090 us | 8.456 us | 56 B |
208
+ // | SIMDUtf8ValidationErrorData | data/german.utf8.txt | 8.728 us | 0.1618 us | 0.3938 us | 8.567 us | 56 B |
209
+ // | SIMDUtf8ValidationRealData | data/japanese.utf8.txt | 11.108 us | 0.1767 us | 0.1653 us | 11.184 us | 56 B |
210
+ // | SIMDUtf8ValidationErrorData | data/japanese.utf8.txt | 9.533 us | 0.1288 us | 0.1205 us | 9.490 us | 56 B |
211
+ // | SIMDUtf8ValidationRealData | data/turkish.utf8.txt | 10.128 us | 0.0544 us | 0.0454 us | 10.126 us | 56 B |
212
+ // | SIMDUtf8ValidationErrorData | data/turkish.utf8.txt | 10.078 us | 0.0499 us | 0.0467 us | 10.079 us | 56 B |
213
+
214
+ // scalar results:
178
215
// if (processedLength < inputLength)
179
216
// {
180
217
// // Directly call the scalar function on the remaining part of the buffer
@@ -190,6 +227,41 @@ public static unsafe class Utf8Utility
190
227
// }
191
228
192
229
230
+ // | Method | FileName | Mean | Error | StdDev | Allocated |
231
+ // |---------------------------- |----------------------- |----------:|----------:|----------:|----------:|
232
+ // | SIMDUtf8ValidationRealData | data/arabic.utf8.txt | 20.136 us | 0.3869 us | 0.5031 us | - |
233
+ // | SIMDUtf8ValidationErrorData | data/arabic.utf8.txt | 19.576 us | 0.2366 us | 0.2098 us | - |
234
+ // | SIMDUtf8ValidationRealData | data/chinese.utf8.txt | 6.207 us | 0.0479 us | 0.0400 us | - |
235
+ // | SIMDUtf8ValidationErrorData | data/chinese.utf8.txt | 6.169 us | 0.0541 us | 0.0506 us | - |
236
+ // | SIMDUtf8ValidationRealData | data/english.utf8.txt | 9.212 us | 0.0121 us | 0.0107 us | - |
237
+ // | SIMDUtf8ValidationErrorData | data/english.utf8.txt | 9.373 us | 0.0250 us | 0.0209 us | - |
238
+ // | SIMDUtf8ValidationRealData | data/french.utf8.txt | 13.726 us | 0.2609 us | 0.2900 us | - |
239
+ // | SIMDUtf8ValidationErrorData | data/french.utf8.txt | 13.948 us | 0.2122 us | 0.1985 us | - |
240
+ // | SIMDUtf8ValidationRealData | data/german.utf8.txt | 4.916 us | 0.0176 us | 0.0147 us | - |
241
+ // | SIMDUtf8ValidationErrorData | data/german.utf8.txt | 4.897 us | 0.0525 us | 0.0491 us | - |
242
+ // | SIMDUtf8ValidationRealData | data/japanese.utf8.txt | 5.526 us | 0.0463 us | 0.0411 us | - |
243
+ // | SIMDUtf8ValidationErrorData | data/japanese.utf8.txt | 5.538 us | 0.0405 us | 0.0379 us | - |
244
+ // | SIMDUtf8ValidationRealData | data/turkish.utf8.txt | 5.838 us | 0.0363 us | 0.0340 us | - |
245
+ // | SIMDUtf8ValidationErrorData | data/turkish.utf8.txt | 5.813 us | 0.0440 us | 0.0412 us | - |
246
+
247
+
248
+ if ( processedLength < inputLength )
249
+ {
250
+
251
+ Span < byte > remainingBytes = stackalloc byte [ 32 ] ;
252
+ for ( int i = 0 ; i < inputLength - processedLength ; i ++ )
253
+ {
254
+ remainingBytes [ i ] = pInputBuffer [ processedLength + i ] ;
255
+ }
256
+
257
+ ReadOnlySpan < Byte > remainingBytesReadOnly = remainingBytes ;
258
+ Vector256 < byte > remainingBlock = Vector256 . Create ( remainingBytesReadOnly ) ;
259
+ Utf8Validation . utf8_checker . CheckNextInput ( remainingBlock , ref prev_input_block , ref prev_incomplete , ref error ) ;
260
+ processedLength += inputLength - processedLength ;
261
+
262
+ }
263
+
264
+
193
265
Utf8Validation . utf8_checker . CheckEof ( ref error , prev_incomplete ) ;
194
266
if ( Utf8Validation . utf8_checker . Errors ( error ) )
195
267
{
0 commit comments