Skip to content

Commit e144a1b

Browse files
committed
fix scalar version
1 parent 4830d84 commit e144a1b

File tree

2 files changed

+50
-23
lines changed

2 files changed

+50
-23
lines changed

src/UTF8.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ public static class UTF8
1111

1212

1313
// Translated method.
14-
public unsafe static byte* RewindAndValidateWithErrors(byte* start, byte* buf, int len)
14+
public unsafe static byte* RewindAndValidateWithErrors(byte* buf, int len)
1515
{
1616
int extraLen = 0;
1717
// A leading byte cannot be further than 4 bytes away.

src/UTF8_validation.cs

Lines changed: 49 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -212,19 +212,16 @@ public static unsafe class Utf8Utility
212212
// | SIMDUtf8ValidationErrorData | data/turkish.utf8.txt | 10.078 us | 0.0499 us | 0.0467 us | 10.079 us | 56 B |
213213

214214
// scalar results:
215-
// if (processedLength < inputLength)
216-
// {
217-
// // Directly call the scalar function on the remaining part of the buffer
218-
// byte* invalidBytePointer = GetPointerToFirstInvalidByte(pInputBuffer + processedLength, inputLength - processedLength -1);
219-
220-
// // You can then use `invalidBytePointer` as needed, for example:
221-
// // if (invalidBytePointer != pInputBuffer + inputLength) {
222-
// // // Handle the case where an invalid byte is found
223-
// // }
224-
225-
// // Update processedLength to reflect the processing done by the scalar function
226-
// processedLength += (int)(invalidBytePointer - pInputBuffer);
227-
// }
215+
if (processedLength < inputLength)
216+
{
217+
byte* invalidBytePointer = UTF8.RewindAndValidateWithErrors(pInputBuffer + processedLength, inputLength - processedLength);
218+
if (invalidBytePointer != pInputBuffer + inputLength)
219+
{
220+
// An invalid byte was found. Adjust error handling as needed.
221+
error = Vector256.Create((byte)1);
222+
}
223+
processedLength += (int)(invalidBytePointer - (pInputBuffer + processedLength));
224+
}
228225

229226

230227
// | Method | FileName | Mean | Error | StdDev | Allocated |
@@ -280,18 +277,48 @@ public static unsafe class Utf8Utility
280277
// | SIMDUtf8ValidationErrorData | data/japanese.utf8.txt | 10.929 us | 0.2096 us | 0.1961 us | - |
281278
// | SIMDUtf8ValidationRealData | data/turkish.utf8.txt | 10.493 us | 0.2098 us | 0.5708 us | - |
282279
// | SIMDUtf8ValidationErrorData | data/turkish.utf8.txt | 9.575 us | 0.1878 us | 0.1757 us | - |
283-
if (processedLength < inputLength)
284-
{
280+
// if (processedLength < inputLength)
281+
// {
285282

286-
Span<byte> remainingBytes = stackalloc byte[32];
287-
new Span<byte>(pInputBuffer + processedLength, inputLength - processedLength).CopyTo(remainingBytes);
283+
// Span<byte> remainingBytes = stackalloc byte[32];
284+
// new Span<byte>(pInputBuffer + processedLength, inputLength - processedLength).CopyTo(remainingBytes);
288285

289-
ReadOnlySpan<Byte> remainingBytesReadOnly = remainingBytes;
290-
Vector256<byte> remainingBlock = Vector256.Create(remainingBytesReadOnly);
291-
Utf8Validation.utf8_checker.CheckNextInput(remainingBlock, ref prev_input_block, ref prev_incomplete, ref error);
292-
processedLength += inputLength - processedLength;
286+
// ReadOnlySpan<Byte> remainingBytesReadOnly = remainingBytes;
287+
// Vector256<byte> remainingBlock = Vector256.Create(remainingBytesReadOnly);
288+
// Utf8Validation.utf8_checker.CheckNextInput(remainingBlock, ref prev_input_block, ref prev_incomplete, ref error);
289+
// processedLength += inputLength - processedLength;
290+
291+
// }
292+
293+
// if (processedLength < inputLength)
294+
// {
295+
296+
// Span<byte> remainingBytes = stackalloc byte[32];
297+
// new Span<byte>(pInputBuffer + processedLength, inputLength - processedLength).CopyTo(remainingBytes);
298+
299+
// ReadOnlySpan<Byte> remainingBytesReadOnly = remainingBytes;
300+
// Vector256<byte> remainingBlock = Vector256.Create(remainingBytesReadOnly);
301+
// Utf8Validation.utf8_checker.CheckNextInput(remainingBlock, ref prev_input_block, ref prev_incomplete, ref error);
302+
// processedLength += inputLength - processedLength;
303+
304+
// }
305+
306+
// if (processedLength < inputLength)
307+
// {
308+
// // Directly call the scalar function on the remaining part of the buffer
309+
// byte* startOfRemaining = pInputBuffer + processedLength;
310+
// int lengthOfRemaining = inputLength - processedLength;
311+
// byte* invalidBytePointer = UTF8.GetPointerToFirstInvalidByte(startOfRemaining, lengthOfRemaining);
312+
313+
// // Use `invalidBytePointer` as needed, for example:
314+
// // if (invalidBytePointer != startOfRemaining + lengthOfRemaining) {
315+
// // // Handle the case where an invalid byte is found
316+
// // }
317+
318+
// // Update processedLength based on the result of the scalar function
319+
// processedLength += (int)(invalidBytePointer - pInputBuffer);
320+
// }
293321

294-
}
295322

296323

297324
Utf8Validation.utf8_checker.CheckEof(ref error, prev_incomplete);

0 commit comments

Comments
 (0)