Skip to content

Commit be79615

Browse files
committed
some progress
1 parent f27117b commit be79615

File tree

1 file changed

+19
-5
lines changed

1 file changed

+19
-5
lines changed

src/UTF8.cs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,21 @@ namespace SimdUnicode
99
public static class UTF8
1010
{
1111

12-
public unsafe static byte* RewindAndValidateWithErrors(int offset, byte* buf, int len,ref int utf16CodeUnitCountAdjustment, ref int scalarCountAdjustment)
12+
public unsafe static byte* RewindAndValidateWithErrors(int howFarBack, byte* buf, int len,ref int utf16CodeUnitCountAdjustment, ref int scalarCountAdjustment)
1313
{
1414

1515
int TempUtf16CodeUnitCountAdjustment = 0;
1616
int TempScalarCountAdjustment = 0;
1717

18-
int howFarBack = offset;
1918
int extraLen = 0;
2019
bool foundLeadingBytes = false;
21-
for (int i = 0; i <= howFarBack; i++)
20+
21+
for (int i = 0; i < howFarBack; i++)
2222
{
2323
byte candidateByte = buf[0 - i];
2424
foundLeadingBytes = (candidateByte & 0b11000000) != 0b10000000;
2525
if (foundLeadingBytes)
2626
{
27-
2827
// adjustment to avoid double counting
2928
if ((candidateByte & 0b11100000) == 0b11000000) // Start of a 2-byte sequence
3029
{
@@ -39,7 +38,17 @@ public static class UTF8
3938
TempUtf16CodeUnitCountAdjustment += 2;
4039
TempScalarCountAdjustment += 1;
4140
}
42-
41+
break;
42+
}
43+
}
44+
45+
46+
for (int i = 0; i <= howFarBack; i++)
47+
{
48+
byte candidateByte = buf[0 - i];
49+
foundLeadingBytes = (candidateByte & 0b11000000) != 0b10000000;
50+
if (foundLeadingBytes)
51+
{
4352
buf -= i;
4453
extraLen = i;
4554
break;
@@ -65,6 +74,9 @@ public static class UTF8
6574
utf16CodeUnitCountAdjustment += TailUtf16CodeUnitCountAdjustment;
6675
scalarCountAdjustment += TailScalarCountAdjustment;
6776

77+
Console.WriteLine("utf16count after rewint:" + utf16CodeUnitCountAdjustment);
78+
Console.WriteLine("scalarcount after rewint:" + scalarCountAdjustment);
79+
6880
return invalidBytePointer;
6981
}
7082

@@ -651,6 +663,8 @@ public unsafe static void AdjustForSkippedBytes(byte* pInputBuffer,// int skippe
651663

652664
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment +TailUtf16CodeUnitCountAdjustment;
653665
scalarCountAdjustment = TempScalarCountAdjustment + TailScalarCodeUnitCountAdjustment;
666+
667+
654668

655669
return invalidBytePointer;
656670

0 commit comments

Comments
 (0)