Skip to content

Commit 197c8c1

Browse files
committed
test pass.
1 parent 3ae4e31 commit 197c8c1

File tree

2 files changed

+57
-7
lines changed

2 files changed

+57
-7
lines changed

benchmark/Benchmark.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ public string GetValue(Summary summary, BenchmarkCase benchmarkCase)
5050
public string ColumnName { get; } = "Speed (GB/s)";
5151
public bool AlwaysShow { get; } = true;
5252
public ColumnCategory Category { get; } = ColumnCategory.Custom;
53-
public int PriorityInCategory { get; } = 0;
54-
public bool IsNumeric { get; } = false;
53+
public int PriorityInCategory { get; }
54+
public bool IsNumeric { get; }
5555
public UnitType UnitType { get; } = UnitType.Dimensionless;
5656
public string Legend { get; } = "The speed in gigabytes per second";
5757
}
@@ -139,6 +139,7 @@ public Config()
139139
@"data/thai.utf8.txt",
140140
@"data/turkish.utf8.txt",
141141
@"data/vietnamese.utf8.txt")]
142+
#pragma warning disable CA1051
142143
public string? FileName;
143144
private byte[] allLinesUtf8 = Array.Empty<byte>();
144145

@@ -155,7 +156,7 @@ private void RunUtf8ValidationBenchmark(byte[] data, Utf8ValidationFunction vali
155156
var res = validationFunction(pUtf8, data.Length);
156157
if (res != pUtf8 + data.Length)
157158
{
158-
throw new Exception("Invalid UTF-8: I expected the pointer to be at the end of the buffer.");
159+
throw new ArgumentException("Invalid UTF-8: I expected the pointer to be at the end of the buffer.");
159160
}
160161
}
161162
}

src/UTF8.cs

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -881,7 +881,12 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
881881
}
882882
return GetPointerToFirstInvalidByteScalar(pInputBuffer + processedLength, inputLength - processedLength, out utf16CodeUnitCountAdjustment, out scalarCountAdjustment);
883883
}
884-
884+
public static void ToString(Vector128<byte> v)
885+
{
886+
Span<byte> b = stackalloc byte[16];
887+
v.CopyTo(b);
888+
Console.WriteLine(Convert.ToHexString(b));
889+
}
885890
public unsafe static byte* GetPointerToFirstInvalidByteArm64(byte* pInputBuffer, int inputLength, out int utf16CodeUnitCountAdjustment, out int scalarCountAdjustment)
886891
{
887892
int processedLength = 0;
@@ -974,6 +979,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
974979
// we need to check if the previous block was incomplete.
975980
if (AdvSimd.Arm64.MaxAcross(prevIncomplete).ToScalar() != 0)
976981
{
982+
// Console.WriteLine("ASCII block, but previous block was incomplete");
977983
int off = processedLength >= 3 ? processedLength - 3 : processedLength;
978984
byte* invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(16 - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
979985
// So the code is correct up to invalidBytePointer
@@ -1013,6 +1019,19 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
10131019
// hardware:
10141020
if (AdvSimd.Arm64.MaxAcross(Vector128.AsUInt32(error)).ToScalar() != 0)
10151021
{
1022+
// Console.WriteLine("Error block detected");
1023+
int slown4 = 0;
1024+
int slowcontbytes = 0;
1025+
int slowasciibytes = 0;
1026+
addCounters(pInputBuffer , pInputBuffer + processedLength, ref slowasciibytes, ref slown4, ref slowcontbytes);
1027+
if(slowasciibytes != asciibytes || slown4 != n4 || slowcontbytes != contbytes)
1028+
{
1029+
Console.WriteLine("Error in counting");
1030+
Console.WriteLine($"asciibytes: {asciibytes} {slowasciibytes}");
1031+
Console.WriteLine($"n4: {n4} {slown4}");
1032+
Console.WriteLine($"contbytes: {contbytes} {slowcontbytes}");
1033+
}
1034+
10161035
byte* invalidBytePointer;
10171036
if (processedLength == 0)
10181037
{
@@ -1038,13 +1057,38 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
10381057
Vector128<sbyte> largestcont = Vector128.Create((sbyte)-65); // -65 => 0b10111111
10391058
contbytes += -AdvSimd.Arm64.AddAcross(AdvSimd.CompareLessThanOrEqual(Vector128.AsSByte(currentBlock), largestcont)).ToScalar();
10401059
Vector128<byte> fourthByteMinusOne = Vector128.Create((byte)(0b11110000u - 1));
1041-
n4 += (int)(AdvSimd.Arm64.AddAcross(AdvSimd.SubtractSaturate(currentBlock, fourthByteMinusOne)).ToScalar());
1060+
1061+
int reallyslown4 = 0;
1062+
for(int i = 0; i < 16; i++)
1063+
{
1064+
if(pInputBuffer[processedLength + i] >= 0xF0)
1065+
{
1066+
reallyslown4++;
1067+
}
1068+
}
1069+
var largerthan0f = AdvSimd.CompareGreaterThan(currentBlock, fourthByteMinusOne);
1070+
var largerthan0fones = AdvSimd.And(largerthan0f, Vector128.Create((byte)1));
1071+
var largerthan0fonescount = AdvSimd.Arm64.AddAcross(largerthan0fones).ToScalar();
1072+
if(largerthan0fonescount != reallyslown4)
1073+
{
1074+
Console.WriteLine("***********Error in counting 4-byte sequences");
1075+
ToString(currentBlock);
1076+
ToString(fourthByteMinusOne);
1077+
ToString(AdvSimd.SubtractSaturate(currentBlock, fourthByteMinusOne));
1078+
ToString(AdvSimd.CompareGreaterThan(currentBlock, fourthByteMinusOne));
1079+
Console.WriteLine(((AdvSimd.Arm64.AddAcross(AdvSimd.CompareGreaterThan(currentBlock, fourthByteMinusOne)).ToScalar()^0xff)+1)&0xff);
1080+
1081+
Console.WriteLine(reallyslown4);
1082+
}
1083+
n4 += largerthan0fonescount;
1084+
1085+
//n4 += (int)(((AdvSimd.Arm64.AddAcross(AdvSimd.CompareGreaterThan(currentBlock, fourthByteMinusOne)).ToScalar()^0xff)+1)&0xff);
10421086
}
10431087
asciibytes -= (sbyte)AdvSimd.Arm64.AddAcross(AdvSimd.CompareLessThan(currentBlock, v80)).ToScalar();
10441088
}
10451089

10461090
// We may still have an error.
1047-
if (processedLength < inputLength || !Avx2.TestZ(prevIncomplete, prevIncomplete))
1091+
if (processedLength < inputLength || AdvSimd.Arm64.MaxAcross(prevIncomplete).ToScalar() != 0)
10481092
{
10491093
byte* invalidBytePointer;
10501094
if (processedLength == 0)
@@ -1054,10 +1098,11 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
10541098
else
10551099
{
10561100
invalidBytePointer = SimdUnicode.UTF8.SimpleRewindAndValidateWithErrors(processedLength - 3, pInputBuffer + processedLength - 3, inputLength - processedLength + 3);
1057-
10581101
}
10591102
if (invalidBytePointer != pInputBuffer + inputLength)
10601103
{
1104+
// Console.WriteLine("trailing Error block detected");
1105+
10611106
if (invalidBytePointer < pInputBuffer + processedLength)
10621107
{
10631108
removeCounters(invalidBytePointer, pInputBuffer + processedLength, ref asciibytes, ref n4, ref contbytes);
@@ -1075,11 +1120,15 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
10751120
addCounters(pInputBuffer + processedLength, invalidBytePointer, ref asciibytes, ref n4, ref contbytes);
10761121
}
10771122
}
1123+
//Console.WriteLine("trailing simd method");
1124+
10781125
int final_total_bytes_processed = inputLength - start_point;
10791126
(utf16CodeUnitCountAdjustment, scalarCountAdjustment) = CalculateN2N3FinalSIMDAdjustments(asciibytes, n4, contbytes, final_total_bytes_processed);
10801127
return pInputBuffer + inputLength;
10811128
}
10821129
}
1130+
//Console.WriteLine("trailing scalar method");
1131+
10831132
return GetPointerToFirstInvalidByteScalar(pInputBuffer + processedLength, inputLength - processedLength, out utf16CodeUnitCountAdjustment, out scalarCountAdjustment);
10841133
}
10851134

0 commit comments

Comments
 (0)