Skip to content

Commit 1d27d6f

Browse files
committed
all tests working need cleanup
1 parent e27c85f commit 1d27d6f

File tree

1 file changed

+59
-7
lines changed

1 file changed

+59
-7
lines changed

src/UTF8.cs

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,44 @@ static void PrintHexAndBinary(byte[] bytes, int highlightIndex = -1)
7676

7777
static Func<byte, string> byteToBinaryString = b => Convert.ToString(b, 2).PadLeft(8, '0');//for debugging
7878

79+
// prevents double counting in case there is a toolong error on the edge
80+
public static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(byte headerByte)
81+
{
82+
// Check if the header byte belongs to a 2-byte UTF-8 character
83+
if ((headerByte & 0b11100000) == 0b11000000)
84+
{
85+
return (1, 0);
86+
}
87+
// Check if the header byte belongs to a 3-byte UTF-8 character
88+
else if ((headerByte & 0b11110000) == 0b11100000)
89+
{
90+
return (2, 0);
91+
}
92+
// Check if the header byte belongs to a 4-byte UTF-8 character
93+
else if ((headerByte & 0b11111000) == 0b11110000)
94+
{
95+
96+
return (2, 1);
97+
}
98+
// Otherwise, it's a 1-byte character or continuation byte
99+
return (0, 0);
100+
}
101+
102+
79103
public unsafe static byte* RewindAndValidateWithErrors(int howFarBack, byte* buf, int len,ref int utf16CodeUnitCountAdjustment, ref int scalarCountAdjustment)
80104
{
81105

82106
int extraLen = 0;
83107
bool foundLeadingBytes = false;
84108

109+
// Print the byte value at the buf pointer
110+
byte* PinputPlusProcessedlength = buf;
111+
112+
113+
114+
int TooLongErroronEdgeUtfadjust = 0;
115+
int TooLongErroronEdgeScalaradjust = 0;
116+
85117
for (int i = 0; i <= howFarBack; i++)
86118
{
87119
byte candidateByte = buf[0 - i];
@@ -92,6 +124,8 @@ static void PrintHexAndBinary(byte[] bytes, int highlightIndex = -1)
92124
if (foundLeadingBytes)
93125
{
94126

127+
(TooLongErroronEdgeUtfadjust,TooLongErroronEdgeScalaradjust) = GetFinalScalarUtfAdjustments(candidateByte);
128+
95129
buf -= i;
96130
break;
97131
}
@@ -108,6 +142,24 @@ static void PrintHexAndBinary(byte[] bytes, int highlightIndex = -1)
108142
byte* invalidBytePointer = GetPointerToFirstInvalidByteScalar(buf, len + extraLen,out TailUtf16CodeUnitCountAdjustment, out TailScalarCountAdjustment);
109143
// Console.WriteLine($"RewindScalarValidation's function utf16adjust:{TailUtf16CodeUnitCountAdjustment}, scalaradjust:{TailScalarCountAdjustment}");
110144

145+
bool isContinuationByte = (invalidBytePointer[0] & 0xC0) == 0x80;
146+
bool isOneByteAfterProcessedLength = (invalidBytePointer == PinputPlusProcessedlength);
147+
148+
149+
150+
// // Print the byte value at the invalidBytePointer
151+
152+
153+
154+
155+
if (isContinuationByte && isOneByteAfterProcessedLength)
156+
{
157+
158+
utf16CodeUnitCountAdjustment += TooLongErroronEdgeUtfadjust;
159+
scalarCountAdjustment += TooLongErroronEdgeScalaradjust;
160+
161+
}
162+
111163

112164
utf16CodeUnitCountAdjustment += TailUtf16CodeUnitCountAdjustment;
113165
scalarCountAdjustment += TailScalarCountAdjustment;
@@ -302,12 +354,12 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
302354
(adjusttotalbyte, backedupByHowMuch, adjustascii, adjustcont, adjustn4) = adjustmentFactor(pInputBuffer + processedLength);
303355
}
304356

305-
if (TooLongErroronEdge)
306-
{
307-
asciibytes += adjustascii;
308-
contbytes += adjustcont;
309-
n4 += adjustn4;
310-
}
357+
// if (TooLongErroronEdge)
358+
// {
359+
// asciibytes += adjustascii;
360+
// contbytes += adjustcont;
361+
// n4 += adjustn4;
362+
// }
311363

312364
var (utfadjust, scalaradjust) = CalculateN2N3FinalSIMDAdjustments(asciibytes, n4, contbytes, totalbyte + adjusttotalbyte);
313365

@@ -698,7 +750,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
698750
if (isContinuationByte && isOneByteAfterProcessedLength)
699751
{
700752

701-
TooLongErroronEdge = true;
753+
// TooLongErroronEdge = true;
702754
}
703755

704756

0 commit comments

Comments
 (0)