Skip to content

Commit c4e4f87

Browse files
committed
tail benchmarks
1 parent b81cc48 commit c4e4f87

File tree

1 file changed

+45
-5
lines changed

1 file changed

+45
-5
lines changed

src/UTF8_validation.cs

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,8 @@ public static unsafe class Utf8Utility
165165

166166
}
167167

168-
// First fix bencrmarks static utf checker
168+
// First fix bencrmarks static utf checker
169+
//
169170
// | Method | FileName | Mean | Error | StdDev | Allocated |
170171
// |---------------------------- |----------------------- |-----------:|----------:|-----------:|----------:|
171172
// | SIMDUtf8ValidationRealData | data/arabic.utf8.txt | 478.655 us | 8.9312 us | 15.4059 us | - |
@@ -212,24 +213,63 @@ public static unsafe class Utf8Utility
212213
// | SIMDUtf8ValidationErrorData | data/japanese.utf8.txt | 75.751 us | 0.9603 us | 0.7498 us | - |
213214
// | SIMDUtf8ValidationRealData | data/turkish.utf8.txt | 173.199 us | 3.4289 us | 5.4386 us | - |
214215
// | SIMDUtf8ValidationErrorData | data/turkish.utf8.txt | 112.989 us | 1.7684 us | 1.5677 us | - |
216+
// if (processedLength < inputLength)
217+
// {
218+
219+
// Span<byte> remainingBytes = stackalloc byte[64];
220+
// new Span<byte>(pInputBuffer + processedLength, inputLength - processedLength).CopyTo(remainingBytes);
221+
222+
// ReadOnlySpan<Byte> remainingBytesReadOnly = remainingBytes;
223+
// Vector256<byte> remainingBlock = Vector256.Create(remainingBytesReadOnly);
224+
// Utf8Validation.utf8_checker.CheckNextInput(remainingBlock);
225+
226+
// Utf8Validation.utf8_checker.CheckEof();
227+
// if (Utf8Validation.utf8_checker.Errors())
228+
// {
229+
// // return pInputBuffer + processedLength;
230+
// return SimdUnicode.UTF8.GetPointerToFirstInvalidByte(pInputBuffer + processedLength,inputLength - processedLength);
231+
// }
232+
// processedLength += inputLength - processedLength;
233+
234+
// }
235+
236+
// | Method | FileName | Mean | Error | StdDev | Allocated |
237+
// |---------------------------- |----------------------- |-----------:|----------:|----------:|----------:|
238+
// | SIMDUtf8ValidationRealData | data/arabic.utf8.txt | 454.353 us | 6.0327 us | 5.3478 us | - |
239+
// | SIMDUtf8ValidationErrorData | data/arabic.utf8.txt | 278.734 us | 5.3031 us | 5.8943 us | - |
240+
// | SIMDUtf8ValidationRealData | data/chinese.utf8.txt | 127.542 us | 2.2544 us | 2.1087 us | - |
241+
// | SIMDUtf8ValidationErrorData | data/chinese.utf8.txt | 15.822 us | 0.3030 us | 0.3832 us | - |
242+
// | SIMDUtf8ValidationRealData | data/english.utf8.txt | 11.016 us | 0.1309 us | 0.1225 us | - |
243+
// | SIMDUtf8ValidationErrorData | data/english.utf8.txt | 11.030 us | 0.1580 us | 0.1400 us | - |
244+
// | SIMDUtf8ValidationRealData | data/french.utf8.txt | 12.547 us | 0.0740 us | 0.0656 us | - |
245+
// | SIMDUtf8ValidationErrorData | data/french.utf8.txt | 12.652 us | 0.1455 us | 0.1290 us | - |
246+
// | SIMDUtf8ValidationRealData | data/german.utf8.txt | 5.755 us | 0.0277 us | 0.0246 us | - |
247+
// | SIMDUtf8ValidationErrorData | data/german.utf8.txt | 5.669 us | 0.0079 us | 0.0070 us | - |
248+
// | SIMDUtf8ValidationRealData | data/japanese.utf8.txt | 130.835 us | 0.5999 us | 0.5612 us | - |
249+
// | SIMDUtf8ValidationErrorData | data/japanese.utf8.txt | 71.814 us | 1.0399 us | 0.9727 us | - |
250+
// | SIMDUtf8ValidationRealData | data/turkish.utf8.txt | 167.163 us | 3.1610 us | 4.1103 us | - |
251+
// | SIMDUtf8ValidationErrorData | data/turkish.utf8.txt | 109.607 us | 0.6636 us | 0.5542 us | - |
252+
253+
215254
if (processedLength < inputLength)
216255
{
217256

218257
Span<byte> remainingBytes = stackalloc byte[64];
219-
new Span<byte>(pInputBuffer + processedLength, inputLength - processedLength).CopyTo(remainingBytes);
258+
for (int i = 0; i < inputLength - processedLength; i++)
259+
{
260+
remainingBytes[i] = pInputBuffer[processedLength + i];
261+
}
220262

221263
ReadOnlySpan<Byte> remainingBytesReadOnly = remainingBytes;
222264
Vector256<byte> remainingBlock = Vector256.Create(remainingBytesReadOnly);
223265
Utf8Validation.utf8_checker.CheckNextInput(remainingBlock);
224-
225266
Utf8Validation.utf8_checker.CheckEof();
226267
if (Utf8Validation.utf8_checker.Errors())
227268
{
228269
// return pInputBuffer + processedLength;
229270
return SimdUnicode.UTF8.GetPointerToFirstInvalidByte(pInputBuffer + processedLength,inputLength - processedLength);
230271
}
231272
processedLength += inputLength - processedLength;
232-
233273
}
234274

235275

@@ -239,7 +279,7 @@ public static unsafe class Utf8Utility
239279

240280
}
241281

242-
// Returns a pointer to the first invalid byte in the input buffer if it's invalid, or a pointer to the end if it's valid.
282+
Returns a pointer to the first invalid byte in the input buffer if it's invalid, or a pointer to the end if it's valid.
243283
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
244284
public static byte* SIMDGetPointerToFirstInvalidByte(byte* pInputBuffer, int processedLength)
245285
{

0 commit comments

Comments
 (0)