Skip to content

Commit 4830d84

Browse files
committed
scalar rewind and validate
1 parent ec3869b commit 4830d84

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

src/UTF8.cs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,33 @@ namespace SimdUnicode
99
public static class UTF8
1010
{
1111

12+
13+
// Translated method.
14+
public unsafe static byte* RewindAndValidateWithErrors(byte* start, byte* buf, int len)
15+
{
16+
int extraLen = 0;
17+
// A leading byte cannot be further than 4 bytes away.
18+
19+
for (int i = 0; i < 5; i++)
20+
{
21+
byte currentByte = *buf;
22+
if ((currentByte & 0b11000000) != 0b10000000)
23+
{
24+
break; // Found a leading byte or ASCII, stop rewinding.
25+
}
26+
else
27+
{
28+
buf--; // Rewind to the previous byte.
29+
extraLen++;
30+
}
31+
}
32+
33+
// Now buf points to the start of a UTF-8 sequence or the start of the buffer.
34+
// Validate from this new start point with the adjusted length.
35+
byte* invalidByte = GetPointerToFirstInvalidByte(buf, len + extraLen);
36+
37+
return invalidByte;
38+
}
1239
public unsafe static byte* GetPointerToFirstInvalidByte(byte* pInputBuffer, int inputLength)
1340
{
1441

0 commit comments

Comments
 (0)