@@ -790,7 +790,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
790
790
int asciibytes = 0 ; // number of ascii bytes in the block (could also be called n1)
791
791
int contbytes = 0 ; // number of continuation bytes in the block
792
792
int n4 = 0 ; // number of 4-byte sequences that start in this block
793
-
794
793
for ( ; processedLength + 16 <= inputLength ; processedLength += 16 )
795
794
{
796
795
@@ -817,9 +816,10 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
817
816
{
818
817
// Contains non-ASCII characters, we need to do non-trivial processing
819
818
Vector128 < byte > prev1 = AdvSimd . ExtractVector128 ( prevInputBlock , currentBlock , ( byte ) ( 16 - 1 ) ) ;
820
- Vector128 < byte > byte_1_high = Vector128 . Shuffle ( shuf1 , AdvSimd . ShiftRightLogical ( prev1 . AsUInt16 ( ) , 4 ) . AsByte ( ) & v0f ) ;
821
- Vector128 < byte > byte_1_low = Vector128 . Shuffle ( shuf2 , ( prev1 & v0f ) ) ;
822
- Vector128 < byte > byte_2_high = Vector128 . Shuffle ( shuf3 , AdvSimd . ShiftRightLogical ( currentBlock . AsUInt16 ( ) , 4 ) . AsByte ( ) & v0f ) ;
819
+ // Vector128.Shuffle vs AdvSimd.Arm64.VectorTableLookup: prefer the latter!!!
820
+ Vector128 < byte > byte_1_high = AdvSimd . Arm64 . VectorTableLookup ( shuf1 , AdvSimd . ShiftRightLogical ( prev1 . AsUInt16 ( ) , 4 ) . AsByte ( ) & v0f ) ;
821
+ Vector128 < byte > byte_1_low = AdvSimd . Arm64 . VectorTableLookup ( shuf2 , ( prev1 & v0f ) ) ;
822
+ Vector128 < byte > byte_2_high = AdvSimd . Arm64 . VectorTableLookup ( shuf3 , AdvSimd . ShiftRightLogical ( currentBlock . AsUInt16 ( ) , 4 ) . AsByte ( ) & v0f ) ;
823
823
Vector128 < byte > sc = AdvSimd . And ( AdvSimd . And ( byte_1_high , byte_1_low ) , byte_2_high ) ;
824
824
Vector128 < byte > prev2 = AdvSimd . ExtractVector128 ( prevInputBlock , currentBlock , ( byte ) ( 16 - 2 ) ) ;
825
825
Vector128 < byte > prev3 = AdvSimd . ExtractVector128 ( prevInputBlock , currentBlock , ( byte ) ( 16 - 3 ) ) ;
@@ -849,13 +849,11 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
849
849
}
850
850
prevIncomplete = AdvSimd . SubtractSaturate ( currentBlock , maxValue ) ;
851
851
Vector128 < sbyte > largestcont = Vector128 . Create ( ( sbyte ) - 65 ) ; // -65 => 0b10111111
852
- contbytes += 16 - AdvSimd . Arm64 . AddAcross ( AdvSimd . CompareGreaterThan ( Vector128 . AsSByte ( currentBlock ) , largestcont ) ) . ToScalar ( ) ;
852
+ contbytes += - AdvSimd . Arm64 . AddAcross ( AdvSimd . CompareLessThanOrEqual ( Vector128 . AsSByte ( currentBlock ) , largestcont ) ) . ToScalar ( ) ;
853
853
Vector128 < byte > fourthByteMinusOne = Vector128 . Create ( ( byte ) ( 0b11110000u - 1 ) ) ;
854
854
n4 += ( int ) ( AdvSimd . Arm64 . AddAcross ( AdvSimd . SubtractSaturate ( currentBlock , fourthByteMinusOne ) ) . ToScalar ( ) ) ;
855
855
}
856
-
857
- asciibytes -= ( int ) AdvSimd . Arm64 . AddAcross ( AdvSimd . CompareGreaterThanOrEqual ( currentBlock , v80 ) ) . ToScalar ( ) ;
858
-
856
+ asciibytes -= ( sbyte ) AdvSimd . Arm64 . AddAcross ( AdvSimd . CompareLessThan ( currentBlock , v80 ) ) . ToScalar ( ) ;
859
857
}
860
858
861
859
int totalbyte = processedLength - start_point ;
@@ -886,7 +884,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust(
886
884
}
887
885
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment ;
888
886
scalarCountAdjustment = TempScalarCountAdjustment + TailScalarCodeUnitCountAdjustment ;
889
-
890
887
return pInputBuffer + inputLength ;
891
888
}
892
889
public unsafe static byte * GetPointerToFirstInvalidByte ( byte * pInputBuffer , int inputLength , out int Utf16CodeUnitCountAdjustment , out int ScalarCodeUnitCountAdjustment )
0 commit comments