Skip to content

Commit 5d31256

Browse files
committed
non-stasic test
1 parent 7fb9b2b commit 5d31256

File tree

1 file changed

+41
-36
lines changed

1 file changed

+41
-36
lines changed

src/UTF8_validation.cs

Lines changed: 41 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ public static unsafe class Utf8Utility
141141
public static byte* GetPointerToFirstInvalidByte(byte* pInputBuffer, int inputLength)
142142
{
143143

144-
144+
var checker = new Utf8Validation.utf8_checker();
145145

146146
int processedLength = 0;
147147

@@ -153,10 +153,15 @@ public static unsafe class Utf8Utility
153153
while (processedLength + 64 <= inputLength)
154154
{
155155

156-
SIMDGetPointerToFirstInvalidByte(pInputBuffer,processedLength);
157-
158-
Utf8Validation.utf8_checker.CheckEof();
159-
if (Utf8Validation.utf8_checker.Errors())
156+
// SIMDGetPointerToFirstInvalidByte(pInputBuffer,processedLength);
157+
158+
Vector256<byte> currentBlock = Avx.LoadVector256(pInputBuffer + processedLength);
159+
checker.CheckNextInput(currentBlock);
160+
currentBlock = Avx.LoadVector256(pInputBuffer + processedLength);
161+
checker.CheckNextInput(currentBlock);
162+
163+
checker.CheckEof();
164+
if (checker.Errors())
160165
{
161166
// return pInputBuffer + processedLength;
162167
return SimdUnicode.UTF8.RewindAndValidateWithErrors(pInputBuffer + processedLength,inputLength - processedLength);
@@ -220,10 +225,10 @@ public static unsafe class Utf8Utility
220225

221226
ReadOnlySpan<Byte> remainingBytesReadOnly = remainingBytes;
222227
Vector256<byte> remainingBlock = Vector256.Create(remainingBytesReadOnly);
223-
Utf8Validation.utf8_checker.CheckNextInput(remainingBlock);
228+
checker.CheckNextInput(remainingBlock);
224229

225-
Utf8Validation.utf8_checker.CheckEof();
226-
if (Utf8Validation.utf8_checker.Errors())
230+
checker.CheckEof();
231+
if (checker.Errors())
227232
{
228233
// return pInputBuffer + processedLength;
229234
return SimdUnicode.UTF8.GetPointerToFirstInvalidByte(pInputBuffer + processedLength,inputLength - processedLength);
@@ -241,28 +246,28 @@ public static unsafe class Utf8Utility
241246

242247
// Returns a pointer to the first invalid byte in the input buffer if it's invalid, or a pointer to the end if it's valid.
243248
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
244-
public static byte* SIMDGetPointerToFirstInvalidByte(byte* pInputBuffer, int processedLength)
245-
{
246-
////////////////
247-
// TODO: I recommend taking this code and calling it something
248-
// else. Then have the current function (GetPointerToFirstInvalidByte)
249-
// call the SIMD function only if inputLength is sufficiently large (maybe 64 bytes),
250-
// otherwise, use the scalar function.
251-
////////////////
249+
// public static byte* SIMDGetPointerToFirstInvalidByte(byte* pInputBuffer, int processedLength)
250+
// {
251+
// ////////////////
252+
// // TODO: I recommend taking this code and calling it something
253+
// // else. Then have the current function (GetPointerToFirstInvalidByte)
254+
// // call the SIMD function only if inputLength is sufficiently large (maybe 64 bytes),
255+
// // otherwise, use the scalar function.
256+
// ////////////////
252257

253258

254259

255-
Vector256<byte> currentBlock = Avx.LoadVector256(pInputBuffer + processedLength);
256-
Utf8Validation.utf8_checker.CheckNextInput(currentBlock);
260+
// Vector256<byte> currentBlock = Avx.LoadVector256(pInputBuffer + processedLength);
261+
// checker.CheckNextInput(currentBlock);
257262

258-
processedLength += 32;
263+
// processedLength += 32;
259264

260-
currentBlock = Avx.LoadVector256(pInputBuffer + processedLength);
261-
Utf8Validation.utf8_checker.CheckNextInput(currentBlock);
262-
processedLength += 32;
265+
// currentBlock = Avx.LoadVector256(pInputBuffer + processedLength);
266+
// checker.CheckNextInput(currentBlock);
267+
// processedLength += 32;
263268

264-
return pInputBuffer + processedLength;
265-
}
269+
// return pInputBuffer + processedLength;
270+
// }
266271
}
267272

268273
// C# docs suggests that classes are allocated on the heap:
@@ -273,9 +278,9 @@ public struct utf8_checker
273278
{
274279

275280

276-
static Vector256<byte> error = Vector256<byte>.Zero;
277-
static Vector256<byte> prev_input_block = Vector256<byte>.Zero;
278-
static Vector256<byte> prev_incomplete = Vector256<byte>.Zero;
281+
Vector256<byte> error = Vector256<byte>.Zero;
282+
Vector256<byte> prev_input_block = Vector256<byte>.Zero;
283+
Vector256<byte> prev_incomplete = Vector256<byte>.Zero;
279284

280285
// Explicit constructor
281286
public utf8_checker()
@@ -292,7 +297,7 @@ public utf8_checker()
292297
// This is the simplest least time-consuming implementation.
293298
[MethodImpl(MethodImplOptions.AggressiveInlining)]
294299

295-
public static void CheckNextInput(Vector256<byte> input)
300+
public void CheckNextInput(Vector256<byte> input)
296301
{
297302
// Compiles to:
298303
/*
@@ -358,7 +363,7 @@ je G_M000_IG04
358363

359364
[MethodImpl(MethodImplOptions.AggressiveInlining)]
360365

361-
public static void CheckUtf8Bytes(Vector256<byte> input)
366+
public void CheckUtf8Bytes(Vector256<byte> input)
362367
{
363368
// compiles to
364369
// vmovups ymm0, ymmword ptr [rcx]
@@ -399,7 +404,7 @@ public static void CheckUtf8Bytes(Vector256<byte> input)
399404

400405
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
401406

402-
public static bool Errors()
407+
public bool Errors()
403408
{
404409
// Console.WriteLine("Error Vector at the end: " + VectorToString(error));
405410
// compiles to:
@@ -411,7 +416,7 @@ public static bool Errors()
411416

412417
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
413418

414-
public static void CheckEof()
419+
public void CheckEof()
415420
{
416421
// Console.WriteLine("Error Vector before check_eof(): " + VectorToString(error));
417422
// Console.WriteLine("prev_incomplete Vector in check_eof(): " + VectorToString(prev_incomplete));
@@ -437,7 +442,7 @@ public static void CheckEof()
437442
[MethodImpl(MethodImplOptions.AggressiveInlining)]
438443

439444
// This corresponds to section 6.1 e.g Table 6 of the paper e.g. 1-2 bytes
440-
private static Vector256<byte> CheckSpecialCases(Vector256<byte> input, Vector256<byte> prev1)
445+
private Vector256<byte> CheckSpecialCases(Vector256<byte> input, Vector256<byte> prev1)
441446
{
442447

443448
// define bits that indicate error code
@@ -533,7 +538,7 @@ private static Vector256<byte> CheckSpecialCases(Vector256<byte> input, Vector25
533538
}
534539

535540
[MethodImpl(MethodImplOptions.AggressiveInlining)]
536-
private static Vector256<byte> CheckMultibyteLengths(Vector256<byte> input, Vector256<byte> prev_input, Vector256<byte> sc)
541+
private Vector256<byte> CheckMultibyteLengths(Vector256<byte> input, Vector256<byte> prev_input, Vector256<byte> sc)
537542
{
538543
// Console.WriteLine("sc: " + VectorToString(sc));
539544

@@ -564,7 +569,7 @@ private static Vector256<byte> CheckMultibyteLengths(Vector256<byte> input, Vect
564569
}
565570

566571
[MethodImpl(MethodImplOptions.AggressiveInlining)]
567-
private static Vector256<byte> MustBe23Continuation(Vector256<byte> prev2, Vector256<byte> prev3)
572+
private Vector256<byte> MustBe23Continuation(Vector256<byte> prev2, Vector256<byte> prev3)
568573
{
569574
// Compiles to
570575
// vmovups ymm0, ymmword ptr [rdx]
@@ -598,7 +603,7 @@ private static Vector256<byte> MustBe23Continuation(Vector256<byte> prev2, Vecto
598603

599604
[MethodImpl(MethodImplOptions.AggressiveInlining)]
600605

601-
private static Vector256<byte> IsIncomplete(Vector256<byte> input)
606+
private Vector256<byte> IsIncomplete(Vector256<byte> input)
602607
{
603608
// Console.WriteLine("Input Vector is_incomplete: " + VectorToString(input));
604609
// byte[] maxArray = new byte[32]
@@ -624,7 +629,7 @@ private static Vector256<byte> IsIncomplete(Vector256<byte> input)
624629

625630
[MethodImpl(MethodImplOptions.AggressiveInlining)]
626631

627-
private static Vector256<byte> SaturatingSubtractUnsigned(Vector256<byte> left, Vector256<byte> right)
632+
private Vector256<byte> SaturatingSubtractUnsigned(Vector256<byte> left, Vector256<byte> right)
628633
{
629634
// Compiles to
630635
// vpsubusw ymm0, ymm0, ymmword ptr [r8]

0 commit comments

Comments
 (0)