Skip to content

Commit 3ae4e31

Browse files
committed
silencing more warnings, and adding an Arm64 skeletton
1 parent f037b96 commit 3ae4e31

File tree

3 files changed

+99
-84
lines changed

3 files changed

+99
-84
lines changed

src/UTF8.cs

Lines changed: 69 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -813,7 +813,6 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
813813
if (processedLength == 0)
814814
{
815815
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(0, pInputBuffer + processedLength, inputLength - processedLength);
816-
817816
}
818817
else
819818
{
@@ -827,9 +826,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
827826
{
828827
addCounters(pInputBuffer + processedLength, invalidBytePointer, ref asciibytes, ref n4, ref contbytes);
829828
}
830-
831829
int total_bytes_processed = (int)(invalidBytePointer - (pInputBuffer + start_point));
832-
833830
(utf16CodeUnitCountAdjustment, scalarCountAdjustment) = CalculateN2N3FinalSIMDAdjustments(asciibytes, n4, contbytes, total_bytes_processed);
834831
return invalidBytePointer;
835832
}
@@ -855,16 +852,13 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
855852
}
856853
else
857854
{
858-
859855
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
860856

861857
}
862858
if (invalidBytePointer != pInputBuffer + inputLength)
863859
{
864-
865860
if (invalidBytePointer < pInputBuffer + processedLength)
866861
{
867-
868862
removeCounters(invalidBytePointer, pInputBuffer + processedLength, ref asciibytes, ref n4, ref contbytes);
869863
}
870864
else
@@ -891,16 +885,10 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
891885
public unsafe static byte* GetPointerToFirstInvalidByteArm64(byte* pInputBuffer, int inputLength, out int utf16CodeUnitCountAdjustment, out int scalarCountAdjustment)
892886
{
893887
int processedLength = 0;
894-
int TempUtf16CodeUnitCountAdjustment = 0;
895-
int TempScalarCountAdjustment = 0;
896-
897-
int TailScalarCodeUnitCountAdjustment = 0;
898-
int TailUtf16CodeUnitCountAdjustment = 0;
899-
900888
if (pInputBuffer == null || inputLength <= 0)
901889
{
902-
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment;
903-
scalarCountAdjustment = TempScalarCountAdjustment;
890+
utf16CodeUnitCountAdjustment = 0;
891+
scalarCountAdjustment = 0;
904892
return pInputBuffer;
905893
}
906894
if (inputLength > 128)
@@ -986,14 +974,20 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
986974
// we need to check if the previous block was incomplete.
987975
if (AdvSimd.Arm64.MaxAcross(prevIncomplete).ToScalar() != 0)
988976
{
989-
int totalbyteasciierror = processedLength - start_point;
990-
var (utfadjustasciierror, scalaradjustasciierror) = CalculateN2N3FinalSIMDAdjustments(asciibytes, n4, contbytes, totalbyteasciierror);
991-
992-
utf16CodeUnitCountAdjustment = utfadjustasciierror;
993-
scalarCountAdjustment = scalaradjustasciierror;
994-
995977
int off = processedLength >= 3 ? processedLength - 3 : processedLength;
996-
return SimdUnicode.UTF8.RewindAndValidateWithErrors(off, pInputBuffer + off, inputLength - off, ref utf16CodeUnitCountAdjustment, ref scalarCountAdjustment);
978+
byte* invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(16 - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
979+
// So the code is correct up to invalidBytePointer
980+
if (invalidBytePointer < pInputBuffer + processedLength)
981+
{
982+
removeCounters(invalidBytePointer, pInputBuffer + processedLength, ref asciibytes, ref n4, ref contbytes);
983+
}
984+
else
985+
{
986+
addCounters(pInputBuffer + processedLength, invalidBytePointer, ref asciibytes, ref n4, ref contbytes);
987+
}
988+
int totalbyteasciierror = processedLength - start_point;
989+
(utf16CodeUnitCountAdjustment, scalarCountAdjustment) = CalculateN2N3FinalSIMDAdjustments(asciibytes, n4, contbytes, totalbyteasciierror);
990+
return invalidBytePointer;
997991
}
998992
prevIncomplete = Vector128<byte>.Zero;
999993
}
@@ -1019,17 +1013,25 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
10191013
// hardware:
10201014
if (AdvSimd.Arm64.MaxAcross(Vector128.AsUInt32(error)).ToScalar() != 0)
10211015
{
1022-
int off = processedLength >= 3 ? processedLength - 3 : processedLength;
1023-
byte* invalidBytePointer = SimdUnicode.UTF8.RewindAndValidateWithErrors(off, pInputBuffer + processedLength, inputLength - processedLength, ref TailUtf16CodeUnitCountAdjustment, ref TailScalarCodeUnitCountAdjustment);
1024-
utf16CodeUnitCountAdjustment = TailUtf16CodeUnitCountAdjustment;
1025-
scalarCountAdjustment = TailScalarCodeUnitCountAdjustment;
1026-
1027-
int totalbyteasciierror = processedLength - start_point;
1028-
var (utfadjustasciierror, scalaradjustasciierror) = calculateErrorPathadjust(start_point, processedLength, pInputBuffer, asciibytes, n4, contbytes);
1029-
1030-
utf16CodeUnitCountAdjustment += utfadjustasciierror;
1031-
scalarCountAdjustment += scalaradjustasciierror;
1032-
1016+
byte* invalidBytePointer;
1017+
if (processedLength == 0)
1018+
{
1019+
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(0, pInputBuffer + processedLength, inputLength - processedLength);
1020+
}
1021+
else
1022+
{
1023+
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
1024+
}
1025+
if (invalidBytePointer < pInputBuffer + processedLength)
1026+
{
1027+
removeCounters(invalidBytePointer, pInputBuffer + processedLength, ref asciibytes, ref n4, ref contbytes);
1028+
}
1029+
else
1030+
{
1031+
addCounters(pInputBuffer + processedLength, invalidBytePointer, ref asciibytes, ref n4, ref contbytes);
1032+
}
1033+
int total_bytes_processed = (int)(invalidBytePointer - (pInputBuffer + start_point));
1034+
(utf16CodeUnitCountAdjustment, scalarCountAdjustment) = CalculateN2N3FinalSIMDAdjustments(asciibytes, n4, contbytes, total_bytes_processed);
10331035
return invalidBytePointer;
10341036
}
10351037
prevIncomplete = AdvSimd.SubtractSaturate(currentBlock, maxValue);
@@ -1041,34 +1043,44 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
10411043
asciibytes -= (sbyte)AdvSimd.Arm64.AddAcross(AdvSimd.CompareLessThan(currentBlock, v80)).ToScalar();
10421044
}
10431045

1044-
int totalbyte = processedLength - start_point;
1045-
var (utf16adjust, scalaradjust) = CalculateN2N3FinalSIMDAdjustments(asciibytes, n4, contbytes, totalbyte);
1046-
1047-
TempUtf16CodeUnitCountAdjustment = utf16adjust;
1048-
TempScalarCountAdjustment = scalaradjust;
1049-
1050-
}
1051-
}
1052-
// We have processed all the blocks using SIMD, we need to process the remaining bytes.
1053-
// Process the remaining bytes with the scalar function
1054-
// worst possible case is 4 bytes, where we need to backtrack 3 bytes
1055-
// 11110xxxx 10xxxxxx 10xxxxxx 10xxxxxx <== we might be pointing at the last byte
1056-
if (processedLength < inputLength)
1057-
{
1058-
1059-
byte* invalidBytePointer = SimdUnicode.UTF8.RewindAndValidateWithErrors(processedLength, pInputBuffer + processedLength, inputLength - processedLength, ref TailUtf16CodeUnitCountAdjustment, ref TailScalarCodeUnitCountAdjustment);
1060-
if (invalidBytePointer != pInputBuffer + inputLength)
1061-
{
1062-
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment;
1063-
scalarCountAdjustment = TempScalarCountAdjustment + TailScalarCodeUnitCountAdjustment;
1046+
// We may still have an error.
1047+
if (processedLength < inputLength || !Avx2.TestZ(prevIncomplete, prevIncomplete))
1048+
{
1049+
byte* invalidBytePointer;
1050+
if (processedLength == 0)
1051+
{
1052+
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(0, pInputBuffer + processedLength, inputLength - processedLength);
1053+
}
1054+
else
1055+
{
1056+
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
10641057

1065-
// An invalid byte was found by the scalar function
1066-
return invalidBytePointer;
1058+
}
1059+
if (invalidBytePointer != pInputBuffer + inputLength)
1060+
{
1061+
if (invalidBytePointer < pInputBuffer + processedLength)
1062+
{
1063+
removeCounters(invalidBytePointer, pInputBuffer + processedLength, ref asciibytes, ref n4, ref contbytes);
1064+
}
1065+
else
1066+
{
1067+
addCounters(pInputBuffer + processedLength, invalidBytePointer, ref asciibytes, ref n4, ref contbytes);
1068+
}
1069+
int total_bytes_processed = (int)(invalidBytePointer - (pInputBuffer + start_point));
1070+
(utf16CodeUnitCountAdjustment, scalarCountAdjustment) = CalculateN2N3FinalSIMDAdjustments(asciibytes, n4, contbytes, total_bytes_processed);
1071+
return invalidBytePointer;
1072+
}
1073+
else
1074+
{
1075+
addCounters(pInputBuffer + processedLength, invalidBytePointer, ref asciibytes, ref n4, ref contbytes);
1076+
}
1077+
}
1078+
int final_total_bytes_processed = inputLength - start_point;
1079+
(utf16CodeUnitCountAdjustment, scalarCountAdjustment) = CalculateN2N3FinalSIMDAdjustments(asciibytes, n4, contbytes, final_total_bytes_processed);
1080+
return pInputBuffer + inputLength;
10671081
}
10681082
}
1069-
utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment;
1070-
scalarCountAdjustment = TempScalarCountAdjustment + TailScalarCodeUnitCountAdjustment;
1071-
return pInputBuffer + inputLength;
1083+
return GetPointerToFirstInvalidByteScalar(pInputBuffer + processedLength, inputLength - processedLength, out utf16CodeUnitCountAdjustment, out scalarCountAdjustment);
10721084
}
10731085

10741086
private static unsafe void removeCounters(byte* start, byte* end, ref int asciibytes, ref int n4, ref int contbytes)

test/AsciiTest.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ public void TestASCIIGenerator()
112112
// Assertion or check to ensure all sequences were valid ASCII
113113
if (validSequencesCount != NUM_TRIALS)
114114
{
115-
throw new Exception($"Invalid ASCII sequences were generated for {length}-byte sequences!");
115+
throw new ArgumentException($"Invalid ASCII sequences were generated for {length}-byte sequences!");
116116
}
117117
}
118118
}
@@ -136,7 +136,7 @@ public void TestNoErrorGetIndexOfFirstNonAsciiByte()
136136
nuint result = SimdUnicode.Ascii.GetIndexOfFirstNonAsciiByte(pAscii, (nuint)ascii.Length);
137137
if (result != (nuint)ascii.Length)
138138
{
139-
throw new Exception($"Unexpected non-ASCII character found at index {result}");
139+
throw new ArgumentException($"Unexpected non-ASCII character found at index {result}");
140140
}
141141
}
142142
}
@@ -166,7 +166,7 @@ public void TestErrorGetIndexOfFirstNonAsciiByte()
166166
nuint result = SimdUnicode.Ascii.GetIndexOfFirstNonAsciiByte(pAscii, (nuint)ascii.Length);
167167
if (result != (nuint)i)
168168
{
169-
throw new Exception($"Expected non-ASCII character at index {i}, but found at index {result}");
169+
throw new ArgumentException($"Expected non-ASCII character at index {i}, but found at index {result}");
170170
}
171171
}
172172
}

0 commit comments

Comments
 (0)