@@ -25,48 +25,6 @@ public static class UTF8
25
25
bool foundLeadingBytes = false ;
26
26
// Console.WriteLine(prevWasSimd);
27
27
28
- // adjust for filling in gap
29
- // If an error is found, since we start counting tho adjustments on prev3, a gap is left that needs to be counted in case the previous operation was using SIMD
30
- if ( prevWasSimd )
31
- {
32
- // Console.WriteLine("Triggering Negative adjustment!");
33
- // for (int i = 0; i <= 3; i++)
34
- // {
35
- // if (i == 0){continue;}; // we dont want to dbouble count current byte
36
- // byte candidateByte = buf[0 - i];
37
- // foundLeadingBytes = (candidateByte & 0b11000000) != 0b10000000;
38
- // // if (i==0 & foundLeadingBytes){break;};// We dont want to
39
- // // TODO: written like this for readability, I know its ugly so this needs to be rewritten
40
-
41
- // if (foundLeadingBytes)
42
- // {
43
-
44
- // Console.WriteLine("Negative adjstment:Found leading byte at:" + i + ",Byte:" + candidateByte.ToString("X2"));
45
- // // Console.WriteLine("Found leading byte at:" + i + ",Byte:" + Convert.ToString(candidateByte, 2).PadLeft(8, '0'));
46
-
47
- // // adjustment to avoid double counting
48
- // if ((candidateByte & 0b11100000) == 0b11000000) // Start of a 2-byte sequence
49
- // {
50
- // // Console.WriteLine("Found 2 byte");
51
- // TempUtf16CodeUnitCountAdjustment -= 1;
52
- // }
53
- // if ((candidateByte & 0b11110000) == 0b11100000) // Start of a 3-byte sequence
54
- // {
55
- // // Console.WriteLine("Found 3 byte");
56
- // TempUtf16CodeUnitCountAdjustment -= 2;
57
- // }
58
- // if ((candidateByte & 0b11111000) == 0b11110000) // Start of a 4-byte sequence
59
- // {
60
- // // Console.WriteLine("Found 4 byte");
61
- // TempUtf16CodeUnitCountAdjustment -= 2;
62
- // TempScalarCountAdjustment -= 1;
63
- // }
64
- // // break;
65
- // }
66
- // }
67
- }
68
-
69
-
70
28
// for (int i = 0; i <= howFarBack; i++)
71
29
// {
72
30
// if (i==0){continue;};// we dont want to miss out on counting the current byte, only to avoid double counting what may have been counted prior
@@ -102,13 +60,13 @@ public static class UTF8
102
60
103
61
for ( int i = 0 ; i <= howFarBack ; i ++ )
104
62
{
105
- Console . WriteLine ( "backup stat :" + i ) ;
63
+ Console . WriteLine ( "Activiting main backup :" + i ) ;
106
64
byte candidateByte = buf [ 0 - i ] ;
107
65
foundLeadingBytes = ( candidateByte & 0b11000000 ) != 0b10000000 ;
108
66
if ( foundLeadingBytes )
109
67
{
110
68
buf -= i ;
111
- extraLen = i ;
69
+ extraLen = i ; // a measure of how far we've backed up
112
70
Console . WriteLine ( howFarBack ) ;
113
71
Console . WriteLine ( "Found leading byte at:" + i + ",Byte:" + Convert . ToString ( candidateByte , 2 ) . PadLeft ( 8 , '0' ) ) ;
114
72
@@ -117,6 +75,49 @@ public static class UTF8
117
75
}
118
76
}
119
77
78
+ // adjust for filling in gap
79
+ // If an error is found, since we start counting tho adjustments on prev3, a gap is left that needs to be counted in case the previous operation was using SIMD
80
+ // if (prevWasSimd)
81
+ // {
82
+ // Console.WriteLine("Triggering Negative adjustment!");
83
+ // for (int i = extraLen + 1; i <= extraLen + 3; i++)
84
+ // {
85
+ // // if (i == 0){continue;}; // we dont want to double count current byte
86
+ // byte candidateByte = buf[0 - i];
87
+ // foundLeadingBytes = (candidateByte & 0b11000000) != 0b10000000;
88
+ // // Console.WriteLine("Exmining byte...:" + candidateByte.ToString("X2"));
89
+
90
+ // // if (i==0 & foundLeadingBytes){break;};// We dont want to
91
+ // // TODO: written like this for readability, I know its ugly so this needs to be rewritten
92
+
93
+ // if (foundLeadingBytes)
94
+ // {
95
+
96
+ // Console.WriteLine("Negative adjstment:Found leading byte at:" + i + ",Byte:" + candidateByte.ToString("X2"));
97
+ // // Console.WriteLine("Found leading byte at:" + i + ",Byte:" + Convert.ToString(candidateByte, 2).PadLeft(8, '0'));
98
+
99
+ // // adjustment to avoid double counting
100
+ // if ((candidateByte & 0b11100000) == 0b11000000) // Start of a 2-byte sequence
101
+ // {
102
+ // // Console.WriteLine("Found 2 byte");
103
+ // TempUtf16CodeUnitCountAdjustment -= 1;
104
+ // }
105
+ // if ((candidateByte & 0b11110000) == 0b11100000) // Start of a 3-byte sequence
106
+ // {
107
+ // // Console.WriteLine("Found 3 byte");
108
+ // TempUtf16CodeUnitCountAdjustment -= 2;
109
+ // }
110
+ // if ((candidateByte & 0b11111000) == 0b11110000) // Start of a 4-byte sequence
111
+ // {
112
+ // // Console.WriteLine("Found 4 byte");
113
+ // TempUtf16CodeUnitCountAdjustment -= 2;
114
+ // TempScalarCountAdjustment -= 1;
115
+ // }
116
+ // // break;
117
+ // }
118
+ // }
119
+ // }
120
+
120
121
121
122
if ( ! foundLeadingBytes )
122
123
{
@@ -804,14 +805,13 @@ public static class UTF8
804
805
Console . WriteLine ( "-----Error path!!" ) ;
805
806
TailScalarCodeUnitCountAdjustment = 0 ;
806
807
TailUtf16CodeUnitCountAdjustment = 0 ;
807
- int off = 32 ;
808
+ // int off= 32;
808
809
809
- // if (processedLength <32) // not enough bytes to load into SIMD!
810
+ // if (processedLength <32) //
810
811
// {
811
812
// // off = 0;
812
- // prevWasSimd = false; // there was no previous op at all, let alone SIMD one
813
+ // prevWasSimd = false; // not enough bytes to load into SIMD! there was no previous op at all, let alone SIMD one
813
814
// }
814
-
815
815
816
816
// int off = processedLength >= 32 ? processedLength: 0; // we check if there
817
817
// without this there is an overflow if
0 commit comments