Skip to content

Commit c9a60bd

Browse files
committed
small optimization
1 parent a32542b commit c9a60bd

File tree

4 files changed

+48
-10
lines changed

4 files changed

+48
-10
lines changed

src/SimdUnicode.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
<PropertyGroup>
44
<OutputType>Library</OutputType>
5-
<TargetFramework>net7.0</TargetFramework>
5+
<TargetFramework>net8.0</TargetFramework>
66
<Nullable>enable</Nullable>
77
<!-- This is required for SIMD, sse c# - How to run unsafe code in "visual studio code"? - Stack Overflow https://stackoverflow.com/questions/50636693/how-to-run-unsafe-code-in-visual-studio-code -->
88
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>

src/UTF8_validation.cs

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@
88
// Vector256 https://learn.microsoft.com/en-us/dotnet/api/system.runtime.intrinsics.vector256-1?view=net-7.0
99
// I extend it as needed
1010

11+
12+
// | Method | N | Mean | Error | StdDev | Gen0 | Allocated |
13+
// |---------------------------- |----- |-----------:|----------:|----------:|-------:|----------:|
14+
// | SIMDUtf8ValidationValidUtf8 | 100 | 165.8 us | 2.87 us | 2.55 us | 0.4883 | 54.69 KB |
15+
// | SIMDUtf8ValidationValidUtf8 | 8000 | 8,733.5 us | 167.05 us | 211.27 us | - | 33.21 KB |
16+
1117
public static class Vector256Extensions
1218
{
1319
// Gets the second lane of the current vector and the first lane of the previous vector and returns, then shift it right by an appropriate number of bytes (less than 16, or less than 128 bits)
@@ -177,7 +183,7 @@ public static unsafe class Utf8Utility
177183
}
178184

179185
// C# docs suggests that classes are allocated on the heap:
180-
// it doesnt seem to do much in this case but I tthought the suggestion to be sensible.
186+
// it doesnt seem to do much in this case but I thought the suggestion to be sensible.
181187
public struct utf8_validation
182188
{
183189
public struct utf8_checker
@@ -359,12 +365,14 @@ private Vector256<byte> must_be_2_3_continuation(Vector256<byte> prev2, Vector25
359365
Vector256<byte> is_fourth_byte = Avx2.SubtractSaturate(prev3, Vector256.Create((byte)(0b11110000u - 1)));
360366

361367
Vector256<byte> combined = Avx2.Or(is_third_byte, is_fourth_byte);
368+
return combined;
369+
370+
// Vector256<sbyte> signedCombined = combined.AsSByte();
362371

363-
Vector256<sbyte> signedCombined = combined.AsSByte();
364-
Vector256<sbyte> zero = Vector256<sbyte>.Zero;
365-
Vector256<sbyte> comparisonResult = Avx2.CompareGreaterThan(signedCombined, zero);
372+
// Vector256<sbyte> zero = Vector256<sbyte>.Zero;
373+
// Vector256<sbyte> comparisonResult = Avx2.CompareGreaterThan(signedCombined, zero);
366374

367-
return comparisonResult.AsByte();
375+
// return comparisonResult.AsByte();
368376
}
369377

370378

@@ -377,6 +385,13 @@ private Vector256<byte> must_be_2_3_continuation(Vector256<byte> prev2, Vector25
377385
};
378386
Vector256<byte> maxValue = Vector256.Create(MaxArray);
379387

388+
// private static readonly Vector256<byte> maxValue = Vector256.Create(
389+
// 255, 255, 255, 255, 255, 255, 255, 255,
390+
// 255, 255, 255, 255, 255, 255, 255, 255,
391+
// 255, 255, 255, 255, 255, 255, 255, 255,
392+
// 255, 255, 255, 255, 255, 0b11110000 - 1, 0b11100000 - 1, 0b11000000 - 1);
393+
394+
380395
[MethodImpl(MethodImplOptions.AggressiveInlining)]
381396

382397
private Vector256<byte> is_incomplete(Vector256<byte> input)

src/helpers.cs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,30 @@ public static Vector256<byte> CompareGreaterThan(Vector256<byte> left, Vector256
2828

2929
return result;
3030
}
31+
32+
private static readonly int[] previousCollectionCounts = new int[GC.MaxGeneration + 1];
33+
34+
public static void CheckForGCCollections(string sectionName)
35+
{
36+
bool collectionOccurred = false;
37+
38+
for (int i = 0; i <= GC.MaxGeneration; i++)
39+
{
40+
int currentCount = GC.CollectionCount(i);
41+
if (currentCount != previousCollectionCounts[i])
42+
{
43+
Console.WriteLine($"GC occurred in generation {i} during '{sectionName}'. Collections: {currentCount - previousCollectionCounts[i]}");
44+
previousCollectionCounts[i] = currentCount;
45+
collectionOccurred = true;
46+
}
47+
}
48+
49+
if (!collectionOccurred)
50+
{
51+
Console.WriteLine($"No GC occurred during '{sectionName}'.");
52+
}
53+
}
3154
}
55+
56+
3257
}

test/AsciiTest.cs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ namespace tests;
22
using System.Text;
33
using SimdUnicode;
44

5-
//TODO (Nick Nuon): Test UTF8 Generator works correctly
6-
75
public class AsciiTest
86
{
97
[Fact]
@@ -140,7 +138,7 @@ public void TestNoErrorGetIndexOfFirstNonAsciiByte()
140138
{
141139
fixed (byte* pAscii = ascii)
142140
{
143-
nuint result = Ascii.GetIndexOfFirstNonAsciiByte(pAscii, (nuint)ascii.Length);
141+
nuint result = SimdUnicode.Ascii.GetIndexOfFirstNonAsciiByte(pAscii, (nuint)ascii.Length);
144142
if (result != (nuint)ascii.Length)
145143
{
146144
throw new Exception($"Unexpected non-ASCII character found at index {result}");
@@ -170,7 +168,7 @@ public void TestErrorGetIndexOfFirstNonAsciiByte()
170168
{
171169
fixed (byte* pAscii = ascii)
172170
{
173-
nuint result = Ascii.GetIndexOfFirstNonAsciiByte(pAscii, (nuint)ascii.Length);
171+
nuint result = SimdUnicode.Ascii.GetIndexOfFirstNonAsciiByte(pAscii, (nuint)ascii.Length);
174172
if (result != (nuint)i)
175173
{
176174
throw new Exception($"Expected non-ASCII character at index {i}, but found at index {result}");

0 commit comments

Comments
 (0)