Skip to content

Commit e2930d4

Browse files
committed
Added Memory Diagnoser + Agressive inlining
1 parent 2d3b6b1 commit e2930d4

File tree

3 files changed

+28
-3
lines changed

3 files changed

+28
-3
lines changed

benchmark/Benchmark.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ protected void IntroduceError(byte[] utf8, Random random)
104104

105105
}
106106

107+
108+
[MemoryDiagnoser]
109+
107110
public class SyntheticBenchmark : BenchmarkBase
108111
{
109112

@@ -403,6 +406,9 @@ public void SIMDUtf8ValidationErrorUtf8()
403406

404407
}
405408

409+
410+
[MemoryDiagnoser]
411+
406412
public class RealDataBenchmark : BenchmarkBase
407413
{
408414

src/Ascii.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ public static unsafe bool SIMDIsAscii(this ReadOnlySpan<char> s)
134134
return true;
135135
}
136136

137+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
137138
public static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength)
138139
{
139140
byte* buf_orig = pBuffer;

src/UTF8_validation.cs

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using System.Runtime.Intrinsics;
33
using System.Runtime.Intrinsics.X86;
44
using System.Linq;
5+
using System.Runtime.CompilerServices;
56

67

78
// C# already have something that is *more or less* equivalent to our C++ simd class:
@@ -11,7 +12,8 @@
1112
public static class Vector256Extensions
1213
{
1314
// Gets the second lane of the current vector and the first lane of the previous vector and returns, then shift it right by an appropriate number of bytes (less than 16, or less than 128 bits)
14-
// Checked
15+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
16+
1517
public static Vector256<byte> Prev(this Vector256<byte> current, Vector256<byte> prev, int N = 1)
1618
{
1719

@@ -85,6 +87,7 @@ public static unsafe class Utf8Utility
8587

8688

8789
// Returns a pointer to the first invalid byte in the input buffer if it's invalid, or a pointer to the end if it's valid.
90+
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
8891
public static byte* GetPointerToFirstInvalidByte(byte* pInputBuffer, int inputLength)
8992
{
9093
if (pInputBuffer == null || inputLength <= 0)
@@ -148,6 +151,7 @@ public utf8_checker()
148151
// The original C++ implementation is much more extensive and assumes a 512 bit stream as well as several implementations
149152
// In this case I focus solely on AVX2 instructions for prototyping and benchmarking purposes.
150153
// This is the simplest least time-consuming implementation.
154+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
151155

152156
public void check_next_input(Vector256<byte> input)
153157
{
@@ -159,7 +163,6 @@ public void check_next_input(Vector256<byte> input)
159163
// Contains non-ASCII characters, process the vector
160164
check_utf8_bytes(input, prev_input_block);
161165
prev_incomplete = is_incomplete(input);
162-
163166
}
164167

165168

@@ -170,7 +173,8 @@ public void check_next_input(Vector256<byte> input)
170173

171174
}
172175

173-
// Checked
176+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
177+
174178
public void check_utf8_bytes(Vector256<byte> input, Vector256<byte> prev_input)
175179
{
176180
Vector256<byte> prev1 = input.Prev(prev_input, 1);
@@ -185,6 +189,7 @@ public void check_utf8_bytes(Vector256<byte> input, Vector256<byte> prev_input)
185189

186190
}
187191

192+
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
188193

189194
public bool errors()
190195
{
@@ -193,6 +198,8 @@ public bool errors()
193198
return !Avx2.TestZ(error, error);
194199
}
195200

201+
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
202+
196203
public void check_eof()
197204
{
198205
// Console.WriteLine("Error Vector before check_eof(): " + VectorToString(error));
@@ -203,6 +210,8 @@ public void check_eof()
203210

204211
}
205212

213+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
214+
206215
// This corresponds to section 6.1 e.g Table 6 of the paper e.g. 1-2 bytes
207216
private Vector256<byte> check_special_cases(Vector256<byte> input, Vector256<byte> prev1)
208217
{
@@ -267,6 +276,8 @@ private Vector256<byte> check_special_cases(Vector256<byte> input, Vector256<byt
267276
return Avx2.And(Avx2.And(byte_1_high, byte_1_low), byte_2_high);
268277
}
269278

279+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
280+
270281
private Vector256<byte> check_multibyte_lengths(Vector256<byte> input, Vector256<byte> prev_input, Vector256<byte> sc)
271282
{
272283
// Console.WriteLine("sc: " + VectorToString(sc));
@@ -289,6 +300,8 @@ private Vector256<byte> check_multibyte_lengths(Vector256<byte> input, Vector256
289300
return Avx2.Xor(must23_80, sc);
290301
}
291302

303+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
304+
292305
private Vector256<byte> must_be_2_3_continuation(Vector256<byte> prev2, Vector256<byte> prev3)
293306
{
294307
Vector256<byte> is_third_byte = Avx2.SubtractSaturate(prev2, Vector256.Create((byte)(0b11100000u - 1)));
@@ -303,6 +316,8 @@ private Vector256<byte> must_be_2_3_continuation(Vector256<byte> prev2, Vector25
303316
return comparisonResult.AsByte();
304317
}
305318

319+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
320+
306321
private Vector256<byte> is_incomplete(Vector256<byte> input)
307322
{
308323
// Console.WriteLine("Input Vector is_incomplete: " + VectorToString(input));
@@ -321,6 +336,8 @@ private Vector256<byte> is_incomplete(Vector256<byte> input)
321336
return result;
322337
}
323338

339+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
340+
324341
private Vector256<byte> SaturatingSubtractUnsigned(Vector256<byte> left, Vector256<byte> right)
325342
{
326343
if (!Avx2.IsSupported)
@@ -336,6 +353,7 @@ private Vector256<byte> SaturatingSubtractUnsigned(Vector256<byte> left, Vector2
336353
return subtractionResult.AsByte();
337354
}
338355

356+
339357
// Helper functions for debugging
340358
private string VectorToString(Vector256<byte> vector)
341359
{

0 commit comments

Comments
 (0)