Skip to content

Commit 6ff99ee

Browse files
authored
Merge pull request #37 from simdutf/daniel_some_clean
AVX2 performance cleaning + fix
2 parents 5a99bb2 + 44b89a9 commit 6ff99ee

File tree

7 files changed

+838
-929
lines changed

7 files changed

+838
-929
lines changed

README.md

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,31 @@ This is a fast C# library to validate UTF-8 strings.
66

77
## Motivation
88

9-
We seek to speed up the `Utf8Utility.GetPointerToFirstInvalidByte` function. Using the algorithm used by Node.js, Oracle GraalVM and other important systems.
10-
11-
- John Keiser, Daniel Lemire, [Validating UTF-8 In Less Than One Instruction Per Byte](https://arxiv.org/abs/2010.03090), Software: Practice and Experience 51 (5), 2021
9+
We seek to speed up the `Utf8Utility.GetPointerToFirstInvalidByte` function from the C# runtime library.
10+
[The function is private in the Microsoft Runtime](https://github.com/dotnet/runtime/blob/4d709cd12269fcbb3d0fccfb2515541944475954/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs), but we can expose it manually.
1211

13-
The algorithm in question is part of popular JavaScript runtimes such as Node.js and Bun, [by PHP](https://github.com/php/php-src/blob/90e0ce7f0db99767c58dc21e4213c0f8763f657a/ext/mbstring/mbstring.c#L5270), by Oracle GraalVM and many important systems.
12+
Specifically, we provide the function `SimdUnicode.UTF8.GetPointerToFirstInvalidByte` which is a faster
13+
drop-in replacement:
14+
```cs
15+
// Returns &inputBuffer[inputLength] if the input buffer is valid.
16+
/// <summary>
17+
/// Given an input buffer <paramref name="pInputBuffer"/> of byte length <paramref name="inputLength"/>,
18+
/// returns a pointer to where the first invalid data appears in <paramref name="pInputBuffer"/>.
19+
/// The parameter <paramref name="Utf16CodeUnitCountAdjustment"/> is set according to the content of the valid UTF-8 characters encountered, counting -1 for each 2-byte character, -2 for each 3-byte and 4-byte characters.
20+
/// The parameter <paramref name="ScalarCodeUnitCountAdjustment"/> is set according to the content of the valid UTF-8 characters encountered, counting -1 for each 4-byte character.
21+
/// </summary>
22+
/// <remarks>
23+
/// Returns a pointer to the end of <paramref name="pInputBuffer"/> if the buffer is well-formed.
24+
/// </remarks>
25+
public unsafe static byte* GetPointerToFirstInvalidByte(byte* pInputBuffer, int inputLength, out int Utf16CodeUnitCountAdjustment, out int ScalarCodeUnitCountAdjustment);
26+
```
1427

15-
[The function is private in the Microsoft Runtime](https://github.com/dotnet/runtime/blob/4d709cd12269fcbb3d0fccfb2515541944475954/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs), but we can expose it manually.
28+
The function uses advanced instructions (SIMD) on 64-bit ARM and x64 processors, but fallbacks on a
29+
conventional implementation on other systems. We provide extensive tests and benchmarks.
1630

31+
We apply the algorithm used by Node.js, Bun, Oracle GraalVM, by the PHP interpreter and other important systems. The algorithm has been described in the follow article:
1732

33+
- John Keiser, Daniel Lemire, [Validating UTF-8 In Less Than One Instruction Per Byte](https://arxiv.org/abs/2010.03090), Software: Practice and Experience 51 (5), 2021
1834

1935

2036
## Requirements
@@ -30,6 +46,11 @@ dotnet test
3046

3147
To see which tests are running, we recommend setting the verbosity level:
3248

49+
```
50+
dotnet test -v=normal
51+
```
52+
53+
More details could be useful:
3354
```
3455
dotnet test -v d
3556
```
@@ -44,7 +65,7 @@ To run specific tests, it is helpful to use the filter parameter:
4465

4566

4667
```
47-
dotnet test --filter TooShortErrorAVX
68+
dotnet test --filter TooShortErrorAvx2
4869
```
4970

5071
Or to target specific categories:
@@ -89,7 +110,6 @@ dotnet build
89110
We recommend you use `dotnet format`. E.g.,
90111

91112
```
92-
cd test
93113
dotnet format
94114
```
95115

@@ -115,6 +135,7 @@ You can print the content of a vector register like so:
115135
## Performance tips
116136

117137
- Be careful: `Vector128.Shuffle` is not the same as `Ssse3.Shuffle` nor is `Vector128.Shuffle` the same as `Avx2.Shuffle`. Prefer the latter.
138+
- Similarly `Vector128.Shuffle` is not the same as `AdvSimd.Arm64.VectorTableLookup`, use the latter.
118139

119140
## More reading
120141

benchmark/Benchmark.cs

Lines changed: 50 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,17 @@ public class Speed : IColumn
2727
{
2828
public string GetValue(Summary summary, BenchmarkCase benchmarkCase)
2929
{
30+
if (summary is null || benchmarkCase is null || benchmarkCase.Parameters is null)
31+
{
32+
return "N/A";
33+
}
3034
var ourReport = summary.Reports.First(x => x.BenchmarkCase.Equals(benchmarkCase));
3135
var fileName = (string)benchmarkCase.Parameters["FileName"];
32-
long length = new System.IO.FileInfo(fileName).Length;
33-
if (ourReport.ResultStatistics is null)
36+
if (ourReport is null || ourReport.ResultStatistics is null)
3437
{
3538
return "N/A";
3639
}
40+
long length = new System.IO.FileInfo(fileName).Length;
3741
var mean = ourReport.ResultStatistics.Mean;
3842
return $"{(length / ourReport.ResultStatistics.Mean):#####.00}";
3943
}
@@ -46,8 +50,8 @@ public string GetValue(Summary summary, BenchmarkCase benchmarkCase)
4650
public string ColumnName { get; } = "Speed (GB/s)";
4751
public bool AlwaysShow { get; } = true;
4852
public ColumnCategory Category { get; } = ColumnCategory.Custom;
49-
public int PriorityInCategory { get; } = 0;
50-
public bool IsNumeric { get; } = false;
53+
public int PriorityInCategory { get; }
54+
public bool IsNumeric { get; }
5155
public UnitType UnitType { get; } = UnitType.Dimensionless;
5256
public string Legend { get; } = "The speed in gigabytes per second";
5357
}
@@ -57,8 +61,8 @@ public string GetValue(Summary summary, BenchmarkCase benchmarkCase)
5761
[Config(typeof(Config))]
5862
public class RealDataBenchmark
5963
{
60-
61-
private class Config : ManualConfig
64+
#pragma warning disable CA1812
65+
private sealed class Config : ManualConfig
6266
{
6367
public Config()
6468
{
@@ -67,6 +71,7 @@ public Config()
6771

6872
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
6973
{
74+
#pragma warning disable CA1303
7075
Console.WriteLine("ARM64 system detected.");
7176
AddFilter(new AnyCategoriesFilter(["arm64", "scalar", "runtime"]));
7277

@@ -75,21 +80,25 @@ public Config()
7580
{
7681
if (Vector512.IsHardwareAccelerated && System.Runtime.Intrinsics.X86.Avx512Vbmi.IsSupported)
7782
{
83+
#pragma warning disable CA1303
7884
Console.WriteLine("X64 system detected (Intel, AMD,...) with AVX-512 support.");
7985
AddFilter(new AnyCategoriesFilter(["avx512", "avx", "sse", "scalar", "runtime"]));
8086
}
8187
else if (Avx2.IsSupported)
8288
{
89+
#pragma warning disable CA1303
8390
Console.WriteLine("X64 system detected (Intel, AMD,...) with AVX2 support.");
8491
AddFilter(new AnyCategoriesFilter(["avx", "sse", "scalar", "runtime"]));
8592
}
8693
else if (Ssse3.IsSupported)
8794
{
95+
#pragma warning disable CA1303
8896
Console.WriteLine("X64 system detected (Intel, AMD,...) with Sse4.2 support.");
8997
AddFilter(new AnyCategoriesFilter(["sse", "scalar", "runtime"]));
9098
}
9199
else
92100
{
101+
#pragma warning disable CA1303
93102
Console.WriteLine("X64 system detected (Intel, AMD,...) without relevant SIMD support.");
94103
AddFilter(new AnyCategoriesFilter(["scalar", "runtime"]));
95104
}
@@ -130,14 +139,15 @@ public Config()
130139
@"data/thai.utf8.txt",
131140
@"data/turkish.utf8.txt",
132141
@"data/vietnamese.utf8.txt")]
142+
#pragma warning disable CA1051
133143
public string? FileName;
134-
public byte[] allLinesUtf8 = new byte[0];
144+
private byte[] allLinesUtf8 = Array.Empty<byte>();
135145

136146

137147
public unsafe delegate byte* Utf8ValidationFunction(byte* pUtf8, int length);
138148
public unsafe delegate byte* DotnetRuntimeUtf8ValidationFunction(byte* pUtf8, int length, out int utf16CodeUnitCountAdjustment, out int scalarCountAdjustment);
139149

140-
public void RunUtf8ValidationBenchmark(byte[] data, Utf8ValidationFunction validationFunction)
150+
private void RunUtf8ValidationBenchmark(byte[] data, Utf8ValidationFunction validationFunction)
141151
{
142152
unsafe
143153
{
@@ -146,13 +156,13 @@ public void RunUtf8ValidationBenchmark(byte[] data, Utf8ValidationFunction valid
146156
var res = validationFunction(pUtf8, data.Length);
147157
if (res != pUtf8 + data.Length)
148158
{
149-
throw new Exception("Invalid UTF-8: I expected the pointer to be at the end of the buffer.");
159+
throw new ArgumentException("Invalid UTF-8: I expected the pointer to be at the end of the buffer.");
150160
}
151161
}
152162
}
153163
}
154164

155-
public void RunDotnetRuntimeUtf8ValidationBenchmark(byte[] data, DotnetRuntimeUtf8ValidationFunction validationFunction)
165+
private void RunDotnetRuntimeUtf8ValidationBenchmark(byte[] data, DotnetRuntimeUtf8ValidationFunction validationFunction)
156166
{
157167
unsafe
158168
{
@@ -183,20 +193,17 @@ public unsafe void SIMDUtf8ValidationRealData()
183193
{
184194
if (allLinesUtf8 != null)
185195
{
186-
// RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByte);
196+
RunUtf8ValidationBenchmark(allLinesUtf8, (byte* pInputBuffer, int inputLength) =>
197+
{
198+
int dummyUtf16CodeUnitCountAdjustment, dummyScalarCountAdjustment;
199+
// Call the method with additional out parameters within the lambda.
200+
// You must handle these additional out parameters inside the lambda, as they cannot be passed back through the delegate.
201+
return SimdUnicode.UTF8.GetPointerToFirstInvalidByte(pInputBuffer, inputLength, out dummyUtf16CodeUnitCountAdjustment, out dummyScalarCountAdjustment);
202+
});
187203
}
188204
}
189205

190206
[Benchmark]
191-
// [BenchmarkCategory("scalar")]
192-
// public unsafe void Utf8ValidationRealDataScalar()
193-
// {
194-
// if (allLinesUtf8 != null)
195-
// {
196-
// RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar);
197-
// }
198-
// }
199-
200207
[BenchmarkCategory("scalar")]
201208
public unsafe void Utf8ValidationRealDataScalar()
202209
{
@@ -213,45 +220,48 @@ public unsafe void Utf8ValidationRealDataScalar()
213220
}
214221
}
215222

216-
217223
[Benchmark]
218224
[BenchmarkCategory("arm64")]
219225
public unsafe void SIMDUtf8ValidationRealDataArm64()
220226
{
221227
if (allLinesUtf8 != null)
222228
{
223-
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64);
229+
RunUtf8ValidationBenchmark(allLinesUtf8, (byte* pInputBuffer, int inputLength) =>
230+
{
231+
int dummyUtf16CodeUnitCountAdjustment, dummyScalarCountAdjustment;
232+
// Call the method with additional out parameters within the lambda.
233+
// You must handle these additional out parameters inside the lambda, as they cannot be passed back through the delegate.
234+
return SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64(pInputBuffer, inputLength, out dummyUtf16CodeUnitCountAdjustment, out dummyScalarCountAdjustment);
235+
});
224236
}
237+
225238
}
226-
// [Benchmark]
227-
// [BenchmarkCategory("avx")]
228-
// public unsafe void SIMDUtf8ValidationRealDataAvx2()
229-
// {
230-
// if (allLinesUtf8 != null)
231-
// {
232-
// RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2);
233-
// }
234-
// }
239+
235240
[Benchmark]
236-
[BenchmarkCategory("sse")]
237-
public unsafe void SIMDUtf8ValidationRealDataSse()
241+
[BenchmarkCategory("avx")]
242+
public unsafe void SIMDUtf8ValidationRealDataAvx2()
238243
{
239244
if (allLinesUtf8 != null)
240245
{
241-
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteSse);
246+
RunUtf8ValidationBenchmark(allLinesUtf8, (byte* pInputBuffer, int inputLength) =>
247+
{
248+
int dummyUtf16CodeUnitCountAdjustment, dummyScalarCountAdjustment;
249+
// Call the method with additional out parameters within the lambda.
250+
// You must handle these additional out parameters inside the lambda, as they cannot be passed back through the delegate.
251+
return SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2(pInputBuffer, inputLength, out dummyUtf16CodeUnitCountAdjustment, out dummyScalarCountAdjustment);
252+
});
242253
}
243254
}
244-
/*
245-
// TODO: enable this benchmark when the AVX-512 implementation is ready
255+
246256
[Benchmark]
247-
[BenchmarkCategory("avx512")]
248-
public unsafe void SIMDUtf8ValidationRealDataAvx512()
257+
[BenchmarkCategory("sse")]
258+
public unsafe void SIMDUtf8ValidationRealDataSse()
249259
{
250260
if (allLinesUtf8 != null)
251261
{
252-
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx512);
262+
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteSse);
253263
}
254-
}*/
264+
}
255265

256266
}
257267
public class Program

src/Ascii.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ public unsafe static class Ascii
2525

2626
public static bool IsAscii(this string s)
2727
{
28+
if (s == null) return true;
2829
foreach (var c in s)
2930
{
3031
if (!c.IsAscii()) return false;

0 commit comments

Comments
 (0)