@@ -715,16 +715,19 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
715
715
return pInputBuffer + inputLength ;
716
716
}
717
717
718
- public unsafe static byte * GetPointerToFirstInvalidByteArm64 ( byte * pInputBuffer , int inputLength )
718
+ public unsafe static byte * GetPointerToFirstInvalidByteArm64 ( byte * pInputBuffer , int inputLength , out int utf16CodeUnitCountAdjustment , out int scalarCountAdjustment )
719
719
{
720
720
int processedLength = 0 ;
721
721
int TempUtf16CodeUnitCountAdjustment = 0 ;
722
722
int TempScalarCountAdjustment = 0 ;
723
723
724
- int utf16CodeUnitCountAdjustment = 0 , scalarCountAdjustment = 0 ;
724
+ int TailScalarCodeUnitCountAdjustment = 0 ;
725
+ int TailUtf16CodeUnitCountAdjustment = 0 ;
725
726
726
727
if ( pInputBuffer == null || inputLength <= 0 )
727
728
{
729
+ utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment ;
730
+ scalarCountAdjustment = TempScalarCountAdjustment ;
728
731
return pInputBuffer ;
729
732
}
730
733
if ( inputLength > 128 )
@@ -793,18 +796,32 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
793
796
Vector128 < byte > v0f = Vector128 . Create ( ( byte ) 0x0F ) ;
794
797
Vector128 < byte > v80 = Vector128 . Create ( ( byte ) 0x80 ) ;
795
798
// Performance note: we could process 64 bytes at a time for better speed in some cases.
799
+ int start_point = processedLength ;
800
+
801
+ // The block goes from processedLength to processedLength/16*16.
802
+ int asciibytes = 0 ; // number of ascii bytes in the block (could also be called n1)
803
+ int contbytes = 0 ; // number of continuation bytes in the block
804
+ int n4 = 0 ; // number of 4-byte sequences that start in this block
805
+
796
806
for ( ; processedLength + 16 <= inputLength ; processedLength += 16 )
797
807
{
798
808
799
809
Vector128 < byte > currentBlock = AdvSimd . LoadVector128 ( pInputBuffer + processedLength ) ;
800
810
801
- if ( AdvSimd . Arm64 . MaxAcross ( currentBlock ) . ToScalar ( ) > 127 )
811
+ if ( AdvSimd . Arm64 . MaxAcross ( currentBlock ) . ToScalar ( ) <= 127 )
802
812
{
803
813
// We have an ASCII block, no need to process it, but
804
814
// we need to check if the previous block was incomplete.
805
815
if ( AdvSimd . Arm64 . MaxAcross ( prevIncomplete ) . ToScalar ( ) != 0 )
806
816
{
807
- return SimdUnicode . UTF8 . RewindAndValidateWithErrors ( processedLength , pInputBuffer + processedLength , inputLength - processedLength , ref utf16CodeUnitCountAdjustment , ref scalarCountAdjustment ) ;
817
+ int totalbyteasciierror = processedLength - start_point ;
818
+ var ( utfadjustasciierror , scalaradjustasciierror ) = CalculateN2N3FinalSIMDAdjustments ( asciibytes , n4 , contbytes , totalbyteasciierror ) ;
819
+
820
+ utf16CodeUnitCountAdjustment = utfadjustasciierror ;
821
+ scalarCountAdjustment = scalaradjustasciierror ;
822
+
823
+ int off = processedLength >= 3 ? processedLength - 3 : processedLength ;
824
+ return SimdUnicode . UTF8 . RewindAndValidateWithErrors ( off , pInputBuffer + off , inputLength - off , ref utf16CodeUnitCountAdjustment , ref scalarCountAdjustment ) ;
808
825
}
809
826
prevIncomplete = Vector128 < byte > . Zero ;
810
827
}
@@ -829,52 +846,76 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
829
846
// hardware:
830
847
if ( AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( error ) ) . ToScalar ( ) != 0 )
831
848
{
832
- return SimdUnicode . UTF8 . RewindAndValidateWithErrors ( processedLength , pInputBuffer + processedLength , inputLength - processedLength , ref utf16CodeUnitCountAdjustment , ref scalarCountAdjustment ) ;
849
+ int off = processedLength > 32 ? processedLength - 32 : processedLength ; // this does not backup ff processedlength = 32
850
+ byte * invalidBytePointer = SimdUnicode . UTF8 . RewindAndValidateWithErrors ( off , pInputBuffer + processedLength , inputLength - processedLength , ref TailUtf16CodeUnitCountAdjustment , ref TailScalarCodeUnitCountAdjustment ) ;
851
+ utf16CodeUnitCountAdjustment = TailUtf16CodeUnitCountAdjustment ;
852
+ scalarCountAdjustment = TailScalarCodeUnitCountAdjustment ;
853
+
854
+ int totalbyteasciierror = processedLength - start_point ;
855
+ var ( utfadjustasciierror , scalaradjustasciierror ) = calculateErrorPathadjust ( start_point , processedLength , pInputBuffer , asciibytes , n4 , contbytes ) ;
856
+
857
+ utf16CodeUnitCountAdjustment += utfadjustasciierror ;
858
+ scalarCountAdjustment += scalaradjustasciierror ;
859
+
860
+ return invalidBytePointer ;
833
861
}
834
862
prevIncomplete = AdvSimd . SubtractSaturate ( currentBlock , maxValue ) ;
863
+ if ( AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( prevIncomplete ) ) . ToScalar ( ) != 0 )
864
+ {
865
+ // We have an unterminated sequence.
866
+ var ( totalbyteadjustment , i , tempascii , tempcont , tempn4 ) = adjustmentFactor ( pInputBuffer + processedLength + 32 ) ;
867
+ processedLength -= i ;
868
+ n4 += tempn4 ;
869
+ contbytes += tempcont ;
870
+ }
871
+ Vector128 < sbyte > largestcont = Vector128 . Create ( ( sbyte ) - 65 ) ; // -65 => 0b10111111
872
+ contbytes += 16 - AdvSimd . Arm64 . AddAcross ( AdvSimd . CompareGreaterThan ( Vector128 . AsSByte ( currentBlock ) , largestcont ) ) . ToScalar ( ) ;
873
+ Vector128 < byte > fourthByteMinusOne = Vector128 . Create ( ( byte ) ( 0b11110000u - 1 ) ) ;
874
+ n4 += ( int ) ( AdvSimd . Arm64 . AddAcross ( AdvSimd . SubtractSaturate ( currentBlock , fourthByteMinusOne ) ) . ToScalar ( ) ) ;
835
875
}
876
+
877
+ asciibytes -= ( int ) AdvSimd . Arm64 . AddAcross ( AdvSimd . CompareGreaterThanOrEqual ( currentBlock , v80 ) ) . ToScalar ( ) ;
878
+
836
879
}
880
+
881
+ int totalbyte = processedLength - start_point ;
882
+ var ( utf16adjust , scalaradjust ) = CalculateN2N3FinalSIMDAdjustments ( asciibytes , n4 , contbytes , totalbyte ) ;
883
+
884
+ TempUtf16CodeUnitCountAdjustment = utf16adjust ;
885
+ TempScalarCountAdjustment = scalaradjust ;
886
+
837
887
}
838
888
}
839
889
// We have processed all the blocks using SIMD, we need to process the remaining bytes.
840
-
841
890
// Process the remaining bytes with the scalar function
891
+
892
+ // worst possible case is 4 bytes, where we need to backtrack 3 bytes
893
+ // 11110xxxx 10xxxxxx 10xxxxxx 10xxxxxx <== we might be pointing at the last byte
842
894
if ( processedLength < inputLength )
843
895
{
844
- // We need to possibly backtrack to the start of the last code point
845
- // worst possible case is 4 bytes, where we need to backtrack 3 bytes
846
- // 11110xxxx 10xxxxxx 10xxxxxx 10xxxxxx <== we might be pointing at the last byte
847
- if ( processedLength > 0 && ( sbyte ) pInputBuffer [ processedLength ] <= - 65 )
848
- {
849
- processedLength -= 1 ;
850
- if ( processedLength > 0 && ( sbyte ) pInputBuffer [ processedLength ] <= - 65 )
851
- {
852
- processedLength -= 1 ;
853
- if ( processedLength > 0 && ( sbyte ) pInputBuffer [ processedLength ] <= - 65 )
854
- {
855
- processedLength -= 1 ;
856
- }
857
- }
858
- }
859
- int TailScalarCodeUnitCountAdjustment = 0 ;
860
- int TailUtf16CodeUnitCountAdjustment = 0 ;
861
- byte * invalidBytePointer = SimdUnicode . UTF8 . GetPointerToFirstInvalidByteScalar ( pInputBuffer + processedLength , inputLength - processedLength , out TailUtf16CodeUnitCountAdjustment , out TailScalarCodeUnitCountAdjustment ) ;
896
+
897
+ byte * invalidBytePointer = SimdUnicode . UTF8 . RewindAndValidateWithErrors ( 32 , pInputBuffer + processedLength , inputLength - processedLength , ref TailUtf16CodeUnitCountAdjustment , ref TailScalarCodeUnitCountAdjustment ) ;
862
898
if ( invalidBytePointer != pInputBuffer + inputLength )
863
899
{
900
+ utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment ;
901
+ scalarCountAdjustment = TempScalarCountAdjustment + TailScalarCodeUnitCountAdjustment ;
902
+
864
903
// An invalid byte was found by the scalar function
865
904
return invalidBytePointer ;
866
905
}
867
906
}
907
+ utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment ;
908
+ scalarCountAdjustment = TempScalarCountAdjustment + TailScalarCodeUnitCountAdjustment ;
868
909
869
910
return pInputBuffer + inputLength ;
870
911
}
871
912
public unsafe static byte * GetPointerToFirstInvalidByte ( byte * pInputBuffer , int inputLength , out int Utf16CodeUnitCountAdjustment , out int ScalarCodeUnitCountAdjustment )
872
913
{
873
914
874
- // if (AdvSimd.Arm64.IsSupported)
875
- // {
876
- // return GetPointerToFirstInvalidByteArm64(pInputBuffer, inputLength);
877
- // }
915
+ if ( AdvSimd . Arm64 . IsSupported )
916
+ {
917
+ return GetPointerToFirstInvalidByteArm64 ( pInputBuffer , inputLength , out Utf16CodeUnitCountAdjustment , out ScalarCodeUnitCountAdjustment ) ;
918
+ }
878
919
if ( Avx2 . IsSupported )
879
920
{
880
921
return GetPointerToFirstInvalidByteAvx2 ( pInputBuffer , inputLength , out Utf16CodeUnitCountAdjustment , out ScalarCodeUnitCountAdjustment ) ;
0 commit comments