Skip to content

Commit f90ef10

Browse files
authored
Merge pull request #21 from simdutf/runtime_dispatch
Runtime dispatch
2 parents ee9ee16 + 90b8a60 commit f90ef10

File tree

10 files changed

+352
-441
lines changed

10 files changed

+352
-441
lines changed

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,13 @@ cd benchmark
4747
dotnet run -c Release
4848
```
4949

50+
To run just one benchmark, use a filter:
51+
52+
```
53+
cd benchmark
54+
dotnet run --configuration Release --filter "*Arabic-Lipsum*"
55+
```
56+
5057
If you are under macOS or Linux, you may want to run the benchmarks in privileged mode:
5158

5259
```

benchmark/Benchmark.cs

Lines changed: 98 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
using BenchmarkDotNet.Running;
55
using BenchmarkDotNet.Configs;
66
using BenchmarkDotNet.Reports;
7+
using BenchmarkDotNet.Filters;
78
using System.Text;
89
using System.Runtime;
910
using System.Runtime.InteropServices;
@@ -12,7 +13,10 @@
1213
using System.Collections.Generic;
1314
using System.Linq;
1415
using BenchmarkDotNet.Columns;
15-
16+
using System.Runtime.Intrinsics;
17+
using System.Runtime.Intrinsics.X86;
18+
using System.Runtime.Intrinsics.Arm;
19+
using System.Runtime.CompilerServices;
1620

1721

1822
namespace SimdUnicodeBenchmarks
@@ -47,6 +51,8 @@ public string GetValue(Summary summary, BenchmarkCase benchmarkCase)
4751
public UnitType UnitType { get; } = UnitType.Dimensionless;
4852
public string Legend { get; } = "The speed in gigabytes per second";
4953
}
54+
55+
5056
[SimpleJob(launchCount: 1, warmupCount: 3, iterationCount: 3)]
5157
[Config(typeof(Config))]
5258
public class RealDataBenchmark
@@ -56,7 +62,44 @@ private class Config : ManualConfig
5662
{
5763
public Config()
5864
{
59-
AddColumn(new Speed());
65+
AddColumn(new Speed());
66+
67+
68+
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
69+
{
70+
Console.WriteLine("ARM64 system detected.");
71+
AddFilter(new AnyCategoriesFilter(["arm64", "scalar", "runtime"]));
72+
73+
}
74+
else if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
75+
{
76+
if (Vector512.IsHardwareAccelerated && System.Runtime.Intrinsics.X86.Avx512Vbmi.IsSupported)
77+
{
78+
Console.WriteLine("X64 system detected (Intel, AMD,...) with AVX-512 support.");
79+
AddFilter(new AnyCategoriesFilter(["avx512", "avx", "sse", "scalar", "runtime"]));
80+
}
81+
else if (Avx2.IsSupported)
82+
{
83+
Console.WriteLine("X64 system detected (Intel, AMD,...) with AVX2 support.");
84+
AddFilter(new AnyCategoriesFilter(["avx", "sse", "scalar", "runtime"]));
85+
}
86+
else if (Sse42.IsSupported)
87+
{
88+
Console.WriteLine("X64 system detected (Intel, AMD,...) with Sse4.2 support.");
89+
AddFilter(new AnyCategoriesFilter(["sse", "scalar", "runtime"]));
90+
}
91+
else
92+
{
93+
Console.WriteLine("X64 system detected (Intel, AMD,...) without relevant SIMD support.");
94+
AddFilter(new AnyCategoriesFilter(["scalar", "runtime"]));
95+
}
96+
}
97+
else
98+
{
99+
AddFilter(new AnyCategoriesFilter(["scalar", "runtime"]));
100+
101+
}
102+
60103
}
61104
}
62105
// Parameters and variables for real data
@@ -128,42 +171,83 @@ public void Setup()
128171
}
129172

130173
[Benchmark]
174+
[BenchmarkCategory("default", "runtime")]
131175
public unsafe void DotnetRuntimeUtf8ValidationRealData()
132176
{
133177
RunDotnetRuntimeUtf8ValidationBenchmark(allLinesUtf8, DotnetRuntime.Utf8Utility.GetPointerToFirstInvalidByte);
134178
}
135179

136180
[Benchmark]
181+
[BenchmarkCategory("default")]
137182
public unsafe void SIMDUtf8ValidationRealData()
138183
{
139184
if (allLinesUtf8 != null)
140185
{
141186
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByte);
142187
}
143188
}
144-
}
145189

146-
public class Program
147-
{
148-
// TODO: adopt BenchmarkSwitcher https://benchmarkdotnet.org/articles/guides/how-to-run.html
149-
public static void Main(string[] args)
190+
[Benchmark]
191+
[BenchmarkCategory("scalar")]
192+
public unsafe void Utf8ValidationRealDataScalar()
193+
{
194+
if (allLinesUtf8 != null)
195+
{
196+
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar);
197+
}
198+
}
199+
[Benchmark]
200+
[BenchmarkCategory("arm64")]
201+
public unsafe void SIMDUtf8ValidationRealDataArm64()
202+
{
203+
if (allLinesUtf8 != null)
204+
{
205+
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64);
206+
}
207+
}
208+
[Benchmark]
209+
[BenchmarkCategory("avx")]
210+
public unsafe void SIMDUtf8ValidationRealDataAvx2()
150211
{
151-
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
212+
if (allLinesUtf8 != null)
152213
{
153-
Console.WriteLine("ARM64 system detected.");
214+
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2);
154215
}
155-
else if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
216+
}
217+
/*
218+
// TODO: enable this benchmark when the SSE implementation is ready
219+
[Benchmark]
220+
[BenchmarkCategory("sse")]
221+
public unsafe void SIMDUtf8ValidationRealDataSse()
222+
{
223+
if (allLinesUtf8 != null)
156224
{
157-
Console.WriteLine("X64 system detected (Intel, AMD,...).");
225+
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteSse);
158226
}
159-
else
227+
}*/
228+
/*
229+
// TODO: enable this benchmark when the AVX-512 implementation is ready
230+
[Benchmark]
231+
[BenchmarkCategory("avx512")]
232+
public unsafe void SIMDUtf8ValidationRealDataAvx512()
233+
{
234+
if (allLinesUtf8 != null)
160235
{
161-
Console.WriteLine("Unrecognized system.");
236+
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx512);
162237
}
238+
}*/
163239

240+
}
241+
public class Program
242+
{
243+
// TODO: adopt BenchmarkSwitcher https://benchmarkdotnet.org/articles/guides/how-to-run.html
244+
/*public static void Main(string[] args)
245+
{
164246
var config = DefaultConfig.Instance.WithSummaryStyle(SummaryStyle.Default.WithMaxParameterColumnWidth(100));
165247
BenchmarkRunner.Run<RealDataBenchmark>(config);
166-
}
248+
}*/
249+
static void Main(string[] args) => BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args);
250+
167251

168252
}
169253

src/Ascii.cs

Lines changed: 76 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -136,83 +136,111 @@ public static unsafe bool SIMDIsAscii(this ReadOnlySpan<char> s)
136136

137137
[MethodImpl(MethodImplOptions.AggressiveInlining)]
138138
public static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength)
139+
{
140+
if (AdvSimd.Arm64.IsSupported)
141+
{
142+
return GetIndexOfFirstNonAsciiByteArm64(pBuffer, bufferLength);
143+
}
144+
// TODO: Add support for other architectures
145+
/*if (Vector512.IsHardwareAccelerated && Avx512Vbmi2.IsSupported)
146+
{
147+
return GetIndexOfFirstNonAsciiByteAvx512(pBuffer, bufferLength);
148+
}*/
149+
if (Avx2.IsSupported)
150+
{
151+
return GetIndexOfFirstNonAsciiByteAvx2(pBuffer, bufferLength);
152+
}
153+
154+
if (Sse2.IsSupported)
155+
{
156+
return GetIndexOfFirstNonAsciiByteSse2(pBuffer, bufferLength);
157+
158+
}
159+
160+
return GetIndexOfFirstNonAsciiByteScalar(pBuffer, bufferLength);
161+
}
162+
163+
164+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
165+
public static unsafe nuint GetIndexOfFirstNonAsciiByteArm64(byte* pBuffer, nuint bufferLength)
139166
{
140167
byte* buf_orig = pBuffer;
141168
byte* end = pBuffer + bufferLength;
142-
Vector256<sbyte> ascii = Vector256<sbyte>.Zero;
143169

144-
if (Vector256.IsHardwareAccelerated)
170+
for (; pBuffer + 16 <= end; pBuffer += 16)
145171
{
146-
for (; pBuffer + 32 <= end; pBuffer += 32)
172+
Vector128<byte> input = AdvSimd.LoadVector128(pBuffer);
173+
if (AdvSimd.Arm64.MaxAcross(input).ToScalar() > 127)
147174
{
148-
Vector256<sbyte> input = Avx.LoadVector256((sbyte*)pBuffer);
149-
int notascii = Avx2.MoveMask(input.AsByte());
150-
if (notascii != 0)
151-
{
152-
// Print a message for debugging
153-
// Console.WriteLine($"Non-ASCII character found. notascii: {notascii}, index: {(nuint)(pBuffer - buf_orig) + (nuint)BitOperations.TrailingZeroCount(notascii)}");
154-
155-
return (nuint)(pBuffer - buf_orig) + (nuint)BitOperations.TrailingZeroCount(notascii);
156-
}
175+
return (nuint)(pBuffer - buf_orig) + GetIndexOfFirstNonAsciiByteScalar(pBuffer, (nuint)(end - pBuffer));
157176
}
158177
}
159178

160-
if (Vector128.IsHardwareAccelerated)
179+
180+
// Call the scalar function for the remaining bytes
181+
nuint scalarResult = GetIndexOfFirstNonAsciiByteScalar(pBuffer, (nuint)(end - pBuffer));
182+
183+
// Add the number of bytes processed by SIMD
184+
return (nuint)(pBuffer - buf_orig) + scalarResult;
185+
186+
}
187+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
188+
public static unsafe nuint GetIndexOfFirstNonAsciiByteSse2(byte* pBuffer, nuint bufferLength)
189+
{
190+
byte* buf_orig = pBuffer;
191+
byte* end = pBuffer + bufferLength;
192+
193+
for (; pBuffer + 16 <= end; pBuffer += 16)
161194
{
162-
for (; pBuffer + 16 <= end; pBuffer += 16)
195+
Vector128<sbyte> input = Sse2.LoadVector128((sbyte*)pBuffer);
196+
int notascii = Sse2.MoveMask(input.AsByte());
197+
if (notascii != 0)
163198
{
164-
Vector128<sbyte> input = Sse2.LoadVector128((sbyte*)pBuffer);
165-
int notascii = Sse2.MoveMask(input.AsByte());
166-
if (notascii != 0)
167-
{
168-
// Print a message for debugging
169-
// Console.WriteLine($"Non-ASCII character found. notascii: {notascii}, index: {(nuint)(pBuffer - buf_orig) + (nuint)BitOperations.TrailingZeroCount(notascii)}");
170-
171-
return (nuint)(pBuffer - buf_orig) + (nuint)BitOperations.TrailingZeroCount(notascii);
172-
}
199+
return (nuint)(pBuffer - buf_orig) + (nuint)BitOperations.TrailingZeroCount(notascii);
173200
}
174201
}
175202

176-
177203
// Call the scalar function for the remaining bytes
178-
nuint scalarResult = Scalar_GetIndexOfFirstNonAsciiByte(pBuffer, (nuint)(end - pBuffer));
204+
nuint scalarResult = GetIndexOfFirstNonAsciiByteScalar(pBuffer, (nuint)(end - pBuffer));
179205

180206
// Add the number of bytes processed by SIMD
181207
return (nuint)(pBuffer - buf_orig) + scalarResult;
182208

183209
}
184210

211+
185212
[MethodImpl(MethodImplOptions.AggressiveInlining)]
186-
public static unsafe nuint Scalar_GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength)
213+
public static unsafe nuint GetIndexOfFirstNonAsciiByteAvx2(byte* pBuffer, nuint bufferLength)
187214
{
188-
byte* pCurrent = pBuffer;
189-
byte* pBufferEnd = pBuffer + bufferLength;
215+
byte* buf_orig = pBuffer;
216+
byte* end = pBuffer + bufferLength;
190217

191-
if (!Vector128.IsHardwareAccelerated)
218+
for (; pBuffer + 32 <= end; pBuffer += 32)
192219
{
193-
194-
// Process in blocks of 16 bytes when possible
195-
while (pCurrent + 16 <= pBufferEnd)
220+
Vector256<sbyte> input = Avx.LoadVector256((sbyte*)pBuffer);
221+
int notascii = Avx2.MoveMask(input.AsByte());
222+
if (notascii != 0)
196223
{
197-
ulong v1 = *(ulong*)pCurrent;
198-
ulong v2 = *(ulong*)(pCurrent + 8);
199-
ulong v = v1 | v2;
200-
201-
if ((v & 0x8080808080808080) != 0)
202-
{
203-
for (; pCurrent < pBufferEnd; pCurrent++)
204-
{
205-
if (*pCurrent >= 0b10000000)
206-
{
207-
return (nuint)(pCurrent - pBuffer);
208-
}
209-
}
210-
}
211-
212-
pCurrent += 16;
224+
return (nuint)(pBuffer - buf_orig) + (nuint)BitOperations.TrailingZeroCount(notascii);
213225
}
214226
}
215227

228+
229+
230+
// Call the scalar function for the remaining bytes
231+
nuint scalarResult = GetIndexOfFirstNonAsciiByteScalar(pBuffer, (nuint)(end - pBuffer));
232+
233+
// Add the number of bytes processed by SIMD
234+
return (nuint)(pBuffer - buf_orig) + scalarResult;
235+
236+
}
237+
238+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
239+
public static unsafe nuint GetIndexOfFirstNonAsciiByteScalar(byte* pBuffer, nuint bufferLength)
240+
{
241+
byte* pCurrent = pBuffer;
242+
byte* pBufferEnd = pBuffer + bufferLength;
243+
216244
// Process the tail byte-by-byte
217245
for (; pCurrent < pBufferEnd; pCurrent++)
218246
{

src/UTF16.cs

Lines changed: 0 additions & 9 deletions
This file was deleted.

0 commit comments

Comments
 (0)