Skip to content

Commit b81cc48

Browse files
committed
revert back to static
1 parent 257323e commit b81cc48

File tree

1 file changed

+36
-59
lines changed

1 file changed

+36
-59
lines changed

src/UTF8_validation.cs

Lines changed: 36 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,6 @@
1212
// Vector256 https://learn.microsoft.com/en-us/dotnet/api/system.runtime.intrinsics.vector256-1?view=net-7.0
1313
// I extend it as needed
1414

15-
// non-static benchmarks
16-
// | Method | FileName | Mean | Error | StdDev | Median | Allocated |
17-
// |---------------------------- |----------------------- |-----------:|----------:|----------:|-----------:|----------:|
18-
// | SIMDUtf8ValidationRealData | data/arabic.utf8.txt | 419.461 us | 4.7151 us | 4.4105 us | 420.020 us | - |
19-
// | SIMDUtf8ValidationErrorData | data/arabic.utf8.txt | 268.504 us | 2.5139 us | 2.2285 us | 267.491 us | - |
20-
// | SIMDUtf8ValidationRealData | data/chinese.utf8.txt | 113.877 us | 2.2331 us | 3.4101 us | 113.649 us | - |
21-
// | SIMDUtf8ValidationErrorData | data/chinese.utf8.txt | 16.100 us | 0.3168 us | 0.3648 us | 16.059 us | - |
22-
// | SIMDUtf8ValidationRealData | data/english.utf8.txt | 11.170 us | 0.1277 us | 0.1132 us | 11.130 us | - |
23-
// | SIMDUtf8ValidationErrorData | data/english.utf8.txt | 11.010 us | 0.1023 us | 0.0957 us | 11.007 us | - |
24-
// | SIMDUtf8ValidationRealData | data/french.utf8.txt | 12.987 us | 0.1030 us | 0.0963 us | 12.980 us | - |
25-
// | SIMDUtf8ValidationErrorData | data/french.utf8.txt | 12.786 us | 0.1989 us | 0.1860 us | 12.824 us | - |
26-
// | SIMDUtf8ValidationRealData | data/german.utf8.txt | 100.692 us | 2.0088 us | 5.2921 us | 102.429 us | - |
27-
// | SIMDUtf8ValidationErrorData | data/german.utf8.txt | 33.260 us | 0.4813 us | 0.4502 us | 33.186 us | - |
28-
// | SIMDUtf8ValidationRealData | data/japanese.utf8.txt | 134.439 us | 1.0321 us | 0.9149 us | 134.324 us | - |
29-
// | SIMDUtf8ValidationErrorData | data/japanese.utf8.txt | 65.396 us | 1.2923 us | 1.1456 us | 65.504 us | - |
30-
// | SIMDUtf8ValidationRealData | data/turkish.utf8.txt | 5.519 us | 0.0311 us | 0.0275 us | 5.517 us | - |
31-
// | SIMDUtf8ValidationErrorData | data/turkish.utf8.txt | 5.470 us | 0.0270 us | 0.0253 us | 5.466 us | - |
32-
3315

3416
// | Method | N | Mean | Error | StdDev | Gen0 | Allocated |
3517
// |---------------------------- |----- |-----------:|----------:|----------:|-------:|----------:|
@@ -159,7 +141,7 @@ public static unsafe class Utf8Utility
159141
public static byte* GetPointerToFirstInvalidByte(byte* pInputBuffer, int inputLength)
160142
{
161143

162-
var checker = new Utf8Validation.utf8_checker();
144+
163145

164146
int processedLength = 0;
165147

@@ -171,15 +153,10 @@ public static unsafe class Utf8Utility
171153
while (processedLength + 64 <= inputLength)
172154
{
173155

174-
// SIMDGetPointerToFirstInvalidByte(pInputBuffer,processedLength);
175-
176-
Vector256<byte> currentBlock = Avx.LoadVector256(pInputBuffer + processedLength);
177-
checker.CheckNextInput(currentBlock);
178-
currentBlock = Avx.LoadVector256(pInputBuffer + processedLength);
179-
checker.CheckNextInput(currentBlock);
180-
181-
checker.CheckEof();
182-
if (checker.Errors())
156+
SIMDGetPointerToFirstInvalidByte(pInputBuffer,processedLength);
157+
158+
Utf8Validation.utf8_checker.CheckEof();
159+
if (Utf8Validation.utf8_checker.Errors())
183160
{
184161
// return pInputBuffer + processedLength;
185162
return SimdUnicode.UTF8.RewindAndValidateWithErrors(pInputBuffer + processedLength,inputLength - processedLength);
@@ -243,10 +220,10 @@ public static unsafe class Utf8Utility
243220

244221
ReadOnlySpan<Byte> remainingBytesReadOnly = remainingBytes;
245222
Vector256<byte> remainingBlock = Vector256.Create(remainingBytesReadOnly);
246-
checker.CheckNextInput(remainingBlock);
223+
Utf8Validation.utf8_checker.CheckNextInput(remainingBlock);
247224

248-
checker.CheckEof();
249-
if (checker.Errors())
225+
Utf8Validation.utf8_checker.CheckEof();
226+
if (Utf8Validation.utf8_checker.Errors())
250227
{
251228
// return pInputBuffer + processedLength;
252229
return SimdUnicode.UTF8.GetPointerToFirstInvalidByte(pInputBuffer + processedLength,inputLength - processedLength);
@@ -264,28 +241,28 @@ public static unsafe class Utf8Utility
264241

265242
// Returns a pointer to the first invalid byte in the input buffer if it's invalid, or a pointer to the end if it's valid.
266243
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
267-
// public static byte* SIMDGetPointerToFirstInvalidByte(byte* pInputBuffer, int processedLength)
268-
// {
269-
// ////////////////
270-
// // TODO: I recommend taking this code and calling it something
271-
// // else. Then have the current function (GetPointerToFirstInvalidByte)
272-
// // call the SIMD function only if inputLength is sufficiently large (maybe 64 bytes),
273-
// // otherwise, use the scalar function.
274-
// ////////////////
244+
public static byte* SIMDGetPointerToFirstInvalidByte(byte* pInputBuffer, int processedLength)
245+
{
246+
////////////////
247+
// TODO: I recommend taking this code and calling it something
248+
// else. Then have the current function (GetPointerToFirstInvalidByte)
249+
// call the SIMD function only if inputLength is sufficiently large (maybe 64 bytes),
250+
// otherwise, use the scalar function.
251+
////////////////
275252

276253

277254

278-
// Vector256<byte> currentBlock = Avx.LoadVector256(pInputBuffer + processedLength);
279-
// checker.CheckNextInput(currentBlock);
255+
Vector256<byte> currentBlock = Avx.LoadVector256(pInputBuffer + processedLength);
256+
Utf8Validation.utf8_checker.CheckNextInput(currentBlock);
280257

281-
// processedLength += 32;
258+
processedLength += 32;
282259

283-
// currentBlock = Avx.LoadVector256(pInputBuffer + processedLength);
284-
// checker.CheckNextInput(currentBlock);
285-
// processedLength += 32;
260+
currentBlock = Avx.LoadVector256(pInputBuffer + processedLength);
261+
Utf8Validation.utf8_checker.CheckNextInput(currentBlock);
262+
processedLength += 32;
286263

287-
// return pInputBuffer + processedLength;
288-
// }
264+
return pInputBuffer + processedLength;
265+
}
289266
}
290267

291268
// C# docs suggests that classes are allocated on the heap:
@@ -296,9 +273,9 @@ public struct utf8_checker
296273
{
297274

298275

299-
Vector256<byte> error = Vector256<byte>.Zero;
300-
Vector256<byte> prev_input_block = Vector256<byte>.Zero;
301-
Vector256<byte> prev_incomplete = Vector256<byte>.Zero;
276+
static Vector256<byte> error = Vector256<byte>.Zero;
277+
static Vector256<byte> prev_input_block = Vector256<byte>.Zero;
278+
static Vector256<byte> prev_incomplete = Vector256<byte>.Zero;
302279

303280
// Explicit constructor
304281
public utf8_checker()
@@ -315,7 +292,7 @@ public utf8_checker()
315292
// This is the simplest least time-consuming implementation.
316293
[MethodImpl(MethodImplOptions.AggressiveInlining)]
317294

318-
public void CheckNextInput(Vector256<byte> input)
295+
public static void CheckNextInput(Vector256<byte> input)
319296
{
320297
// Compiles to:
321298
/*
@@ -381,7 +358,7 @@ je G_M000_IG04
381358

382359
[MethodImpl(MethodImplOptions.AggressiveInlining)]
383360

384-
public void CheckUtf8Bytes(Vector256<byte> input)
361+
public static void CheckUtf8Bytes(Vector256<byte> input)
385362
{
386363
// compiles to
387364
// vmovups ymm0, ymmword ptr [rcx]
@@ -422,7 +399,7 @@ public void CheckUtf8Bytes(Vector256<byte> input)
422399

423400
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
424401

425-
public bool Errors()
402+
public static bool Errors()
426403
{
427404
// Console.WriteLine("Error Vector at the end: " + VectorToString(error));
428405
// compiles to:
@@ -434,7 +411,7 @@ public bool Errors()
434411

435412
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
436413

437-
public void CheckEof()
414+
public static void CheckEof()
438415
{
439416
// Console.WriteLine("Error Vector before check_eof(): " + VectorToString(error));
440417
// Console.WriteLine("prev_incomplete Vector in check_eof(): " + VectorToString(prev_incomplete));
@@ -460,7 +437,7 @@ public void CheckEof()
460437
[MethodImpl(MethodImplOptions.AggressiveInlining)]
461438

462439
// This corresponds to section 6.1 e.g Table 6 of the paper e.g. 1-2 bytes
463-
private Vector256<byte> CheckSpecialCases(Vector256<byte> input, Vector256<byte> prev1)
440+
private static Vector256<byte> CheckSpecialCases(Vector256<byte> input, Vector256<byte> prev1)
464441
{
465442

466443
// define bits that indicate error code
@@ -556,7 +533,7 @@ private Vector256<byte> CheckSpecialCases(Vector256<byte> input, Vector256<byte>
556533
}
557534

558535
[MethodImpl(MethodImplOptions.AggressiveInlining)]
559-
private Vector256<byte> CheckMultibyteLengths(Vector256<byte> input, Vector256<byte> prev_input, Vector256<byte> sc)
536+
private static Vector256<byte> CheckMultibyteLengths(Vector256<byte> input, Vector256<byte> prev_input, Vector256<byte> sc)
560537
{
561538
// Console.WriteLine("sc: " + VectorToString(sc));
562539

@@ -587,7 +564,7 @@ private Vector256<byte> CheckMultibyteLengths(Vector256<byte> input, Vector256<b
587564
}
588565

589566
[MethodImpl(MethodImplOptions.AggressiveInlining)]
590-
private Vector256<byte> MustBe23Continuation(Vector256<byte> prev2, Vector256<byte> prev3)
567+
private static Vector256<byte> MustBe23Continuation(Vector256<byte> prev2, Vector256<byte> prev3)
591568
{
592569
// Compiles to
593570
// vmovups ymm0, ymmword ptr [rdx]
@@ -621,7 +598,7 @@ private Vector256<byte> MustBe23Continuation(Vector256<byte> prev2, Vector256<by
621598

622599
[MethodImpl(MethodImplOptions.AggressiveInlining)]
623600

624-
private Vector256<byte> IsIncomplete(Vector256<byte> input)
601+
private static Vector256<byte> IsIncomplete(Vector256<byte> input)
625602
{
626603
// Console.WriteLine("Input Vector is_incomplete: " + VectorToString(input));
627604
// byte[] maxArray = new byte[32]
@@ -647,7 +624,7 @@ private Vector256<byte> IsIncomplete(Vector256<byte> input)
647624

648625
[MethodImpl(MethodImplOptions.AggressiveInlining)]
649626

650-
private Vector256<byte> SaturatingSubtractUnsigned(Vector256<byte> left, Vector256<byte> right)
627+
private static Vector256<byte> SaturatingSubtractUnsigned(Vector256<byte> left, Vector256<byte> right)
651628
{
652629
// Compiles to
653630
// vpsubusw ymm0, ymm0, ymmword ptr [r8]

0 commit comments

Comments
 (0)