Skip to content

Commit 7b77dc1

Browse files
committed
correct types
1 parent 0949b93 commit 7b77dc1

File tree

1 file changed

+41
-21
lines changed

1 file changed

+41
-21
lines changed

src/UTF8.cs

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,8 @@ public unsafe static (int totalbyteadjustment,int backedupByHowMuch,int ascii,in
225225

226226
public static (int utfadjust, int scalaradjust) CalculateN2N3FinalSIMDAdjustments(int asciibytes, int n4, int contbytes, int totalbyte)
227227
{
228+
229+
Console.WriteLine("CalculateN2N3FinalSIMDAdjustments's input debug. This is ascii count:" + asciibytes + " n4: " + n4 + " contbytes:" + contbytes + " totalbytes:" + totalbyte);
228230
// Calculate n3 based on the provided formula
229231
int n3 = asciibytes - 2 * n4 + 2 * contbytes - totalbyte;
230232

@@ -237,6 +239,9 @@ public static (int utfadjust, int scalaradjust) CalculateN2N3FinalSIMDAdjustment
237239
// Calculate scalaradjust based on n4
238240
int scalaradjust = -n4;
239241

242+
243+
244+
240245
// Return the calculated utfadjust and scalaradjust
241246
return (utfadjust, scalaradjust);
242247
}
@@ -576,9 +581,15 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
576581
// we need to check if the previous block was incomplete.
577582
if (!Avx2.TestZ(prevIncomplete, prevIncomplete))
578583
{
579-
// TODO? : this path iss not explicitly tested
580-
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment;
581-
scalarCountAdjustment = TempScalarCountAdjustment;
584+
// TODO? : this path is not explicitly tested
585+
Console.WriteLine("---------All ascii need rewind");
586+
587+
588+
int totalbyteasciierror = processedLength - start_point;
589+
var (utfadjustasciierror, scalaradjustasciierror) = CalculateN2N3FinalSIMDAdjustments(asciibytes, n4, contbytes, totalbyteasciierror);
590+
591+
utf16CodeUnitCountAdjustment = utfadjustasciierror;
592+
scalarCountAdjustment = scalaradjustasciierror;
582593

583594
int off = processedLength >= 3 ? processedLength - 3 : processedLength;
584595
return SimdUnicode.UTF8.RewindAndValidateWithErrors(off, pInputBuffer + off, inputLength - off, ref utf16CodeUnitCountAdjustment,ref scalarCountAdjustment);
@@ -608,18 +619,28 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
608619
if (!Avx2.TestZ(error, error))
609620
{
610621
Console.WriteLine("-----Error path!!");
622+
623+
int totalbyteasciierror = processedLength - start_point;
624+
var (utfadjustasciierror, scalaradjustasciierror) = calculateErrorPathadjust(start_point, processedLength, pInputBuffer, asciibytes, n4, contbytes, contbytes);
625+
626+
Console.WriteLine("calculateErrorPathadjust utf16 adjustment:"+ utfadjustasciierror);
627+
Console.WriteLine("calculateErrorPathadjust scalar adjustment:"+ scalaradjustasciierror);
628+
629+
utf16CodeUnitCountAdjustment = utfadjustasciierror;
630+
scalarCountAdjustment = scalaradjustasciierror;
631+
611632
TailScalarCodeUnitCountAdjustment =0;
612633
TailUtf16CodeUnitCountAdjustment =0;
613634

614635
int off = processedLength >= 32 ? processedLength - 32 : processedLength;
615636
byte* invalidBytePointer = SimdUnicode.UTF8.RewindAndValidateWithErrors(off, pInputBuffer + processedLength, inputLength - processedLength, ref TailUtf16CodeUnitCountAdjustment,ref TailScalarCodeUnitCountAdjustment);
616637

617-
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment +TailUtf16CodeUnitCountAdjustment;
618-
scalarCountAdjustment = TempScalarCountAdjustment + TailScalarCodeUnitCountAdjustment;
638+
utf16CodeUnitCountAdjustment += TailUtf16CodeUnitCountAdjustment;
639+
scalarCountAdjustment += TailScalarCodeUnitCountAdjustment;
619640

620641
// Console.WriteLine("--------"); //debug
621-
// Console.WriteLine("TempUTF16 after error rewind:"+ utf16CodeUnitCountAdjustment);
622-
// Console.WriteLine("TempScalar '' '' '':"+ scalarCountAdjustment);
642+
Console.WriteLine("TempUTF16 after error rewind:"+ utf16CodeUnitCountAdjustment);
643+
Console.WriteLine("TempScalar '' '' '':"+ scalarCountAdjustment);
623644

624645
return invalidBytePointer;
625646
}
@@ -632,35 +653,34 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
632653
{
633654
// We have an unterminated sequence.
634655
Console.WriteLine("---Unterminated seq--- at " + processedLength + "bytes");
635-
// processedLength -= 3;
636-
637-
// Console.WriteLine("incomplete utf16 count", incompleteUtf16CodeUnitPreventDoubleCounting);
638-
// int backedup= 0;
639-
640-
// int currentByte = pInputBuffer[processedLength];
641-
// Console.WriteLine("CurrentByte:" + Convert.ToString(currentByte, 2).PadLeft(8, '0'));
642656

643657
var (totalbyteadjustment, i,tempascii, tempn2, tempn4) = adjustmentFactor(pInputBuffer + processedLength + 32);
644658
processedLength -= i;
645-
// totalbyte -= totalbyteadjustment;
646-
asciibytes +=tempascii;
647659
n4 += tempn4;
648660
contbytes +=tempn2;
649661

650-
// // Console.WriteLine("Backed up " + backedup +" bytes");
651662
// // Console.WriteLine("TempUTF16:"+ TempUtf16CodeUnitCountAdjustment);
652663
// // Console.WriteLine("TempScalar:"+ TempScalarCountAdjustment);
653-
// // Console.WriteLine("-----------------");
654664

655665
}
656666

657-
// We use one instruction (MoveMask) to update ncon, plus one arithmetic operation.
658-
contbytes += Avx2.MoveMask(sc);
667+
// No errors! Updating the variables we keep track of
668+
// We use one instruction (MoveMask) to update ncon, plus one arithmetic operation.
669+
contbytes += (int)Popcnt.PopCount((uint)Avx2.MoveMask(sc));
670+
Console.WriteLine("this is contbytes" + contbytes) ;
659671

660672
// We use two instructions (SubtractSaturate and MoveMask) to update n4, with one arithmetic operation.
661-
n4 += Avx2.MoveMask(Avx2.SubtractSaturate(currentBlock, fourthByte));
673+
n4 += (int)Popcnt.PopCount((uint)Avx2.MoveMask(Avx2.SubtractSaturate(currentBlock, fourthByte)));
662674
}
675+
asciibytes += (int)(32 - Popcnt.PopCount((uint)mask));// TODO(Nick Nuon): simplify this expression
676+
663677
}
678+
679+
// important: we just update asciibytes if there was no error.
680+
// We count the number of ascii bytes in the block using just some simple arithmetic
681+
// and no expensive operation:
682+
683+
664684
// There are 2 possible scenarios here : either
665685
// A) it arrives flush en the border. eg it doesnt need to be processed further
666686
// B) There is some bytes remaining in which case we need to call the scalar functien

0 commit comments

Comments
 (0)