Skip to content

Commit 520951f

Browse files
committed
save game
1 parent 73ecbf0 commit 520951f

File tree

2 files changed

+49
-48
lines changed

2 files changed

+49
-48
lines changed

src/UTF8.cs

Lines changed: 48 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -25,48 +25,6 @@ public static class UTF8
2525
bool foundLeadingBytes = false;
2626
// Console.WriteLine(prevWasSimd);
2727

28-
// adjust for filling in gap
29-
// If an error is found, since we start counting tho adjustments on prev3, a gap is left that needs to be counted in case the previous operation was using SIMD
30-
if (prevWasSimd)
31-
{
32-
// Console.WriteLine("Triggering Negative adjustment!");
33-
// for (int i = 0; i <= 3; i++)
34-
// {
35-
// if (i == 0){continue;}; // we dont want to dbouble count current byte
36-
// byte candidateByte = buf[0 - i];
37-
// foundLeadingBytes = (candidateByte & 0b11000000) != 0b10000000;
38-
// // if (i==0 & foundLeadingBytes){break;};// We dont want to
39-
// // TODO: written like this for readability, I know its ugly so this needs to be rewritten
40-
41-
// if (foundLeadingBytes)
42-
// {
43-
44-
// Console.WriteLine("Negative adjstment:Found leading byte at:" + i + ",Byte:" + candidateByte.ToString("X2"));
45-
// // Console.WriteLine("Found leading byte at:" + i + ",Byte:" + Convert.ToString(candidateByte, 2).PadLeft(8, '0'));
46-
47-
// // adjustment to avoid double counting
48-
// if ((candidateByte & 0b11100000) == 0b11000000) // Start of a 2-byte sequence
49-
// {
50-
// // Console.WriteLine("Found 2 byte");
51-
// TempUtf16CodeUnitCountAdjustment -= 1;
52-
// }
53-
// if ((candidateByte & 0b11110000) == 0b11100000) // Start of a 3-byte sequence
54-
// {
55-
// // Console.WriteLine("Found 3 byte");
56-
// TempUtf16CodeUnitCountAdjustment -= 2;
57-
// }
58-
// if ((candidateByte & 0b11111000) == 0b11110000) // Start of a 4-byte sequence
59-
// {
60-
// // Console.WriteLine("Found 4 byte");
61-
// TempUtf16CodeUnitCountAdjustment -= 2;
62-
// TempScalarCountAdjustment -= 1;
63-
// }
64-
// // break;
65-
// }
66-
// }
67-
}
68-
69-
7028
// for (int i = 0; i <= howFarBack; i++)
7129
// {
7230
// if (i==0){continue;};// we dont want to miss out on counting the current byte, only to avoid double counting what may have been counted prior
@@ -102,13 +60,13 @@ public static class UTF8
10260

10361
for (int i = 0; i <= howFarBack; i++)
10462
{
105-
Console.WriteLine("backup stat:" + i);
63+
Console.WriteLine("Activiting main backup:" + i);
10664
byte candidateByte = buf[0 - i];
10765
foundLeadingBytes = (candidateByte & 0b11000000) != 0b10000000;
10866
if (foundLeadingBytes)
10967
{
11068
buf -= i;
111-
extraLen = i;
69+
extraLen = i; // a measure of how far we've backed up
11270
Console.WriteLine(howFarBack);
11371
Console.WriteLine("Found leading byte at:" + i + ",Byte:" + Convert.ToString(candidateByte, 2).PadLeft(8, '0'));
11472

@@ -117,6 +75,49 @@ public static class UTF8
11775
}
11876
}
11977

78+
// adjust for filling in gap
79+
// If an error is found, since we start counting tho adjustments on prev3, a gap is left that needs to be counted in case the previous operation was using SIMD
80+
// if (prevWasSimd)
81+
// {
82+
// Console.WriteLine("Triggering Negative adjustment!");
83+
// for (int i = extraLen + 1; i <= extraLen + 3; i++)
84+
// {
85+
// // if (i == 0){continue;}; // we dont want to double count current byte
86+
// byte candidateByte = buf[0 - i];
87+
// foundLeadingBytes = (candidateByte & 0b11000000) != 0b10000000;
88+
// // Console.WriteLine("Exmining byte...:" + candidateByte.ToString("X2"));
89+
90+
// // if (i==0 & foundLeadingBytes){break;};// We dont want to
91+
// // TODO: written like this for readability, I know its ugly so this needs to be rewritten
92+
93+
// if (foundLeadingBytes)
94+
// {
95+
96+
// Console.WriteLine("Negative adjstment:Found leading byte at:" + i + ",Byte:" + candidateByte.ToString("X2"));
97+
// // Console.WriteLine("Found leading byte at:" + i + ",Byte:" + Convert.ToString(candidateByte, 2).PadLeft(8, '0'));
98+
99+
// // adjustment to avoid double counting
100+
// if ((candidateByte & 0b11100000) == 0b11000000) // Start of a 2-byte sequence
101+
// {
102+
// // Console.WriteLine("Found 2 byte");
103+
// TempUtf16CodeUnitCountAdjustment -= 1;
104+
// }
105+
// if ((candidateByte & 0b11110000) == 0b11100000) // Start of a 3-byte sequence
106+
// {
107+
// // Console.WriteLine("Found 3 byte");
108+
// TempUtf16CodeUnitCountAdjustment -= 2;
109+
// }
110+
// if ((candidateByte & 0b11111000) == 0b11110000) // Start of a 4-byte sequence
111+
// {
112+
// // Console.WriteLine("Found 4 byte");
113+
// TempUtf16CodeUnitCountAdjustment -= 2;
114+
// TempScalarCountAdjustment -= 1;
115+
// }
116+
// // break;
117+
// }
118+
// }
119+
// }
120+
120121

121122
if (!foundLeadingBytes)
122123
{
@@ -804,14 +805,13 @@ public static class UTF8
804805
Console.WriteLine("-----Error path!!");
805806
TailScalarCodeUnitCountAdjustment =0;
806807
TailUtf16CodeUnitCountAdjustment =0;
807-
int off= 32;
808+
// int off= 32;
808809

809-
// if (processedLength <32) // not enough bytes to load into SIMD!
810+
// if (processedLength <32) //
810811
// {
811812
// // off = 0;
812-
// prevWasSimd = false; // there was no previous op at all, let alone SIMD one
813+
// prevWasSimd = false; // not enough bytes to load into SIMD! there was no previous op at all, let alone SIMD one
813814
// }
814-
815815

816816
// int off = processedLength >= 32 ? processedLength: 0; // we check if there
817817
// without this there is an overflow if

test/UTF8ValidationTests.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ namespace tests;
1212
// TODO: refine test for unterminated sequeqce happening at SIMD transition
1313
// TODO: The various tests do not formally take into account the scenario where vector is all ASCII
1414
// TODO?: Test if the error is in the first vector?
15+
// TODO:fix NoError,Ingomplete (some of the tests are wrong)
1516

1617
public unsafe class Utf8SIMDValidationTests
1718
{

0 commit comments

Comments
 (0)