Skip to content

Commit 6f34ead

Browse files
authored
Merge pull request #26 from simdutf/avx2_new_algo_and_tests
Avx2 new algo and tests
2 parents e687f04 + 0595946 commit 6f34ead

File tree

10 files changed

+1771
-288
lines changed

10 files changed

+1771
-288
lines changed

README.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ We recommend you install .NET 8: https://dotnet.microsoft.com/en-us/download/dot
2828
dotnet test
2929
```
3030

31+
To see which tests are running, we recommend setting the verbosity level:
32+
33+
```
34+
dotnet test -v d
35+
```
36+
3137
To get a list of available tests, enter the command:
3238

3339
```
@@ -36,10 +42,19 @@ dotnet test --list-tests
3642

3743
To run specific tests, it is helpful to use the filter parameter:
3844

45+
3946
```
40-
dotnet test -c Release --filter Ascii
47+
dotnet test --filter TooShortErrorAVX
4148
```
4249

50+
Or to target specific categories:
51+
52+
```
53+
dotnet test --filter "Category=scalar"
54+
```
55+
56+
57+
4358
## Running Benchmarks
4459

4560
To run the benchmarks, run the following command:

benchmark/Benchmark.cs

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -183,19 +183,37 @@ public unsafe void SIMDUtf8ValidationRealData()
183183
{
184184
if (allLinesUtf8 != null)
185185
{
186-
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByte);
186+
// RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByte);
187187
}
188188
}
189189

190190
[Benchmark]
191+
// [BenchmarkCategory("scalar")]
192+
// public unsafe void Utf8ValidationRealDataScalar()
193+
// {
194+
// if (allLinesUtf8 != null)
195+
// {
196+
// RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar);
197+
// }
198+
// }
199+
191200
[BenchmarkCategory("scalar")]
192201
public unsafe void Utf8ValidationRealDataScalar()
193202
{
194203
if (allLinesUtf8 != null)
195204
{
196-
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar);
205+
// Assuming allLinesUtf8 is a byte* and its length is provided by another variable, for example, allLinesUtf8Length
206+
RunUtf8ValidationBenchmark(allLinesUtf8, (byte* pInputBuffer, int inputLength) =>
207+
{
208+
int dummyUtf16CodeUnitCountAdjustment, dummyScalarCountAdjustment;
209+
// Call the method with additional out parameters within the lambda.
210+
// You must handle these additional out parameters inside the lambda, as they cannot be passed back through the delegate.
211+
return SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar(pInputBuffer, inputLength, out dummyUtf16CodeUnitCountAdjustment, out dummyScalarCountAdjustment);
212+
});
197213
}
198214
}
215+
216+
199217
[Benchmark]
200218
[BenchmarkCategory("arm64")]
201219
public unsafe void SIMDUtf8ValidationRealDataArm64()
@@ -205,15 +223,15 @@ public unsafe void SIMDUtf8ValidationRealDataArm64()
205223
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64);
206224
}
207225
}
208-
[Benchmark]
209-
[BenchmarkCategory("avx")]
210-
public unsafe void SIMDUtf8ValidationRealDataAvx2()
211-
{
212-
if (allLinesUtf8 != null)
213-
{
214-
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2);
215-
}
216-
}
226+
// [Benchmark]
227+
// [BenchmarkCategory("avx")]
228+
// public unsafe void SIMDUtf8ValidationRealDataAvx2()
229+
// {
230+
// if (allLinesUtf8 != null)
231+
// {
232+
// RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2);
233+
// }
234+
// }
217235
[Benchmark]
218236
[BenchmarkCategory("sse")]
219237
public unsafe void SIMDUtf8ValidationRealDataSse()

benchmark/UTF8_runtime.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
namespace DotnetRuntime
2929
{
3030

31-
internal static unsafe partial class Utf8Utility
31+
public static unsafe partial class Utf8Utility
3232
{
3333
/// <summary>
3434
/// Returns <see langword="true"/> iff the low byte of <paramref name="value"/>
@@ -500,6 +500,7 @@ private static bool UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(uint valu
500500
// the value isn't overlong using a single comparison. On big-endian platforms, we'll need
501501
// to validate the mask and validate that the sequence isn't overlong as two separate comparisons.
502502

503+
// Temp16 - 2
503504
if ((BitConverter.IsLittleEndian && UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord))
504505
|| (!BitConverter.IsLittleEndian && (UInt32EndsWithUtf8TwoByteMask(thisDWord) && !UInt32EndsWithOverlongUtf8TwoByteSequence(thisDWord))))
505506
{

benchmark/benchmark.csproj

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
<ItemGroup>
1616
<ProjectReference Include="..\src\SimdUnicode.csproj" />
17-
<ProjectReference Include="..\test\tests.csproj" />
1817
</ItemGroup>
1918

2019
<ItemGroup>

src/Ascii.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ public static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint buff
157157

158158
}
159159

160-
return GetIndexOfFirstNonAsciiByteScalar(pBuffer, bufferLength);
160+
return GetIndexOfFirstNonAsciiByteScalar(pBuffer, bufferLength);
161161
}
162162

163163

0 commit comments

Comments
 (0)