Skip to content

Commit 0c20b54

Browse files
committed
SAVE GAME
1 parent f2caccf commit 0c20b54

File tree

2 files changed

+275
-4
lines changed

2 files changed

+275
-4
lines changed

src/UTF8_validation.cs

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,25 @@ public static unsafe class Utf8Utility
119119
// return string.Join(" ", binaryStrings);
120120
// }
121121

122+
123+
124+
// par:
125+
// | Method | FileName | Mean | Error | StdDev | Allocated |
126+
// |----------------------------------- |----------------------- |-----------:|----------:|----------:|----------:|
127+
// | CompetitionUtf8ValidationRealData | data/arabic.utf8.txt | 199.315 us | 0.2632 us | 0.2334 us | - |
128+
// | CompetitionUtf8ValidationErrorData | data/arabic.utf8.txt | 132.782 us | 0.5135 us | 0.4552 us | - |
129+
// | CompetitionUtf8ValidationRealData | data/chinese.utf8.txt | 29.674 us | 0.3246 us | 0.2710 us | - |
130+
// | CompetitionUtf8ValidationErrorData | data/chinese.utf8.txt | 5.185 us | 0.0177 us | 0.0148 us | - |
131+
// | CompetitionUtf8ValidationRealData | data/english.utf8.txt | 16.251 us | 0.2844 us | 0.2793 us | - |
132+
// | CompetitionUtf8ValidationErrorData | data/english.utf8.txt | 11.119 us | 0.0405 us | 0.0379 us | - |
133+
// | CompetitionUtf8ValidationRealData | data/french.utf8.txt | 70.772 us | 0.2132 us | 0.1890 us | - |
134+
// | CompetitionUtf8ValidationErrorData | data/french.utf8.txt | 22.515 us | 0.1278 us | 0.1195 us | - |
135+
// | CompetitionUtf8ValidationRealData | data/german.utf8.txt | 14.132 us | 0.0722 us | 0.0640 us | - |
136+
// | CompetitionUtf8ValidationErrorData | data/german.utf8.txt | 6.889 us | 0.0231 us | 0.0205 us | - |
137+
// | CompetitionUtf8ValidationRealData | data/japanese.utf8.txt | 25.023 us | 0.1017 us | 0.0952 us | - |
138+
// | CompetitionUtf8ValidationErrorData | data/japanese.utf8.txt | 17.504 us | 0.0712 us | 0.0666 us | - |
139+
// | CompetitionUtf8ValidationRealData | data/turkish.utf8.txt | 23.755 us | 0.3332 us | 0.3117 us | - |
140+
// | CompetitionUtf8ValidationErrorData | data/turkish.utf8.txt | 21.983 us | 0.1308 us | 0.1223 us | - |
122141
public static byte* GetPointerToFirstInvalidByte(byte* pInputBuffer, int inputLength)
123142
{
124143

@@ -146,16 +165,59 @@ public static unsafe class Utf8Utility
146165

147166
}
148167

168+
// First fix bencrmarks static utf checker
169+
// | Method | FileName | Mean | Error | StdDev | Allocated |
170+
// |---------------------------- |----------------------- |-----------:|----------:|-----------:|----------:|
171+
// | SIMDUtf8ValidationRealData | data/arabic.utf8.txt | 478.655 us | 8.9312 us | 15.4059 us | - |
172+
// | SIMDUtf8ValidationErrorData | data/arabic.utf8.txt | 283.895 us | 5.2810 us | 8.9675 us | - |
173+
// | SIMDUtf8ValidationRealData | data/chinese.utf8.txt | 134.967 us | 2.6698 us | 5.1438 us | - |
174+
// | SIMDUtf8ValidationErrorData | data/chinese.utf8.txt | 17.403 us | 0.3361 us | 0.4820 us | - |
175+
// | SIMDUtf8ValidationRealData | data/english.utf8.txt | 11.186 us | 0.0707 us | 0.0626 us | - |
176+
// | SIMDUtf8ValidationErrorData | data/english.utf8.txt | 11.167 us | 0.1118 us | 0.0991 us | - |
177+
// | SIMDUtf8ValidationRealData | data/french.utf8.txt | 13.303 us | 0.2523 us | 0.2236 us | - |
178+
// | SIMDUtf8ValidationErrorData | data/french.utf8.txt | 13.002 us | 0.1448 us | 0.1284 us | - |
179+
// | SIMDUtf8ValidationRealData | data/german.utf8.txt | 5.965 us | 0.1016 us | 0.0901 us | - |
180+
// | SIMDUtf8ValidationErrorData | data/german.utf8.txt | 5.981 us | 0.0683 us | 0.0639 us | - |
181+
// | SIMDUtf8ValidationRealData | data/japanese.utf8.txt | 138.114 us | 2.6217 us | 3.0191 us | - |
182+
// | SIMDUtf8ValidationErrorData | data/japanese.utf8.txt | 66.023 us | 1.2819 us | 1.1364 us | - |
183+
// | SIMDUtf8ValidationRealData | data/turkish.utf8.txt | 168.166 us | 2.4131 us | 2.2572 us | - |
184+
// | SIMDUtf8ValidationErrorData | data/turkish.utf8.txt | 112.761 us | 2.2175 us | 1.9657 us | - |
185+
186+
187+
149188
// Process the remaining bytes with the scalar function
189+
// if (processedLength < inputLength)
190+
// {
191+
// byte* invalidBytePointer = SimdUnicode.UTF8.GetPointerToFirstInvalidByte(pInputBuffer + processedLength, inputLength - processedLength);
192+
// if (invalidBytePointer != pInputBuffer + inputLength)
193+
// {
194+
// // An invalid byte was found by the scalar function
195+
// return invalidBytePointer;
196+
// }
197+
// }
150198
if (processedLength < inputLength)
151199
{
152-
byte* invalidBytePointer = SimdUnicode.UTF8.GetPointerToFirstInvalidByte(pInputBuffer + processedLength, inputLength - processedLength);
153-
if (invalidBytePointer != pInputBuffer + inputLength)
200+
201+
Span<byte> remainingBytes = stackalloc byte[32];
202+
new Span<byte>(pInputBuffer + processedLength, inputLength - processedLength).CopyTo(remainingBytes);
203+
204+
ReadOnlySpan<Byte> remainingBytesReadOnly = remainingBytes;
205+
Vector256<byte> remainingBlock = Vector256.Create(remainingBytesReadOnly);
206+
Utf8Validation.utf8_checker.CheckNextInput(remainingBlock);
207+
208+
Utf8Validation.utf8_checker.CheckEof();
209+
if (Utf8Validation.utf8_checker.Errors())
154210
{
155-
// An invalid byte was found by the scalar function
156-
return invalidBytePointer;
211+
// return pInputBuffer + processedLength;
212+
return SimdUnicode.UTF8.GetPointerToFirstInvalidByte(pInputBuffer + processedLength,inputLength - processedLength);
157213
}
214+
processedLength += inputLength - processedLength;
215+
158216
}
217+
218+
219+
220+
159221
return pInputBuffer + inputLength;
160222

161223
}

0 commit comments

Comments
 (0)