Skip to content

Commit 5ce2ff6

Browse files
stephentoubBrennanConroy
authored andcommitted
Replace custom ToUtf16 with Ascii.ToUtf16
1 parent eb626ad commit 5ce2ff6

File tree

3 files changed

+40
-270
lines changed

3 files changed

+40
-270
lines changed

src/Servers/Kestrel/Core/src/Internal/Http/HttpParser.cs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,24 +54,28 @@ internal HttpParser(bool showErrorDetails, bool disableHttp1LineFeedTerminators)
5454
private const byte ByteQuestionMark = (byte)'?';
5555
private const byte BytePercentage = (byte)'%';
5656
private const int MinTlsRequestSize = 1; // We need at least 1 byte to check for a proper TLS request line
57+
private static ReadOnlySpan<byte> RequestLineDelimeters => new byte[] { ByteLF, 0 };
5758

5859
/// <summary>
5960
/// This API supports framework infrastructure and is not intended to be used
6061
/// directly from application code.
6162
/// </summary>
6263
public bool ParseRequestLine(TRequestHandler handler, ref SequenceReader<byte> reader)
6364
{
64-
if (reader.TryReadTo(out ReadOnlySpan<byte> requestLine, ByteLF, advancePastDelimiter: true))
65+
// Find the next delimeter.
66+
if (!reader.TryReadToAny(out ReadOnlySpan<byte> requestLine, RequestLineDelimeters, advancePastDelimiter: false))
6567
{
66-
ParseRequestLine(handler, requestLine);
67-
return true;
68+
return false;
6869
}
6970

70-
return false;
71-
}
71+
// Consume the delimiter.
72+
bool foundDelimiter = reader.TryRead(out var next);
73+
Debug.Assert(foundDelimiter);
74+
if (next == 0 || requestLine.Length == 0)
75+
{
76+
RejectRequestLine(requestLine);
77+
}
7278

73-
private void ParseRequestLine(TRequestHandler handler, ReadOnlySpan<byte> requestLine)
74-
{
7579
// Get Method and set the offset
7680
var method = requestLine.GetKnownMethod(out var methodEnd);
7781
if (method == HttpMethod.Custom)
@@ -175,6 +179,8 @@ private void ParseRequestLine(TRequestHandler handler, ReadOnlySpan<byte> reques
175179
// in-place normalization and decoding to transform into a canonical path
176180
var startLine = MemoryMarshal.CreateSpan(ref MemoryMarshal.GetReference(requestLine), queryEnd);
177181
handler.OnStartLine(versionAndMethod, path, startLine);
182+
183+
return true;
178184
}
179185

180186
/// <summary>

src/Servers/Kestrel/Core/src/Internal/Infrastructure/HttpUtilities.cs

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4+
#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
5+
46
using System.Buffers;
57
using System.Buffers.Binary;
68
using System.Diagnostics;
@@ -28,7 +30,6 @@ internal static partial class HttpUtilities
2830
private const ulong _http11VersionLong = 3543824036068086856; // GetAsciiStringAsLong("HTTP/1.1"); const results in better codegen
2931

3032
private static readonly UTF8Encoding DefaultRequestHeaderEncoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
31-
private static readonly SpanAction<char, IntPtr> s_getHeaderName = GetHeaderName;
3233

3334
[MethodImpl(MethodImplOptions.AggressiveInlining)]
3435
private static void SetKnownMethod(ulong mask, ulong knownMethodUlong, HttpMethod knownMethod, int length)
@@ -85,28 +86,13 @@ private static ulong GetMaskAsLong(ReadOnlySpan<byte> bytes)
8586
[MethodImpl(MethodImplOptions.AggressiveInlining)]
8687
public static unsafe string GetHeaderName(this ReadOnlySpan<byte> span)
8788
{
88-
if (span.IsEmpty)
89-
{
90-
return string.Empty;
91-
}
92-
93-
fixed (byte* source = &MemoryMarshal.GetReference(span))
94-
{
95-
return string.Create(span.Length, new IntPtr(source), s_getHeaderName);
96-
}
97-
}
98-
99-
private static unsafe void GetHeaderName(Span<char> buffer, IntPtr state)
100-
{
101-
fixed (char* output = &MemoryMarshal.GetReference(buffer))
89+
return string.Create(span.Length, (IntPtr)(&span), (destination, spanPtr) =>
10290
{
103-
// This version of AsciiUtilities returns null if there are any null (0 byte) characters
104-
// in the string
105-
if (!StringUtilities.TryGetAsciiString((byte*)state.ToPointer(), output, buffer.Length))
91+
if (Ascii.ToUtf16(*(ReadOnlySpan<byte>*)spanPtr, destination, out _) != OperationStatus.Done)
10692
{
10793
KestrelBadHttpRequestException.Throw(RequestRejectionReason.InvalidCharactersInHeaderName);
10894
}
109-
}
95+
});
11096
}
11197

11298
public static string GetAsciiStringNonNullCharacters(this Span<byte> span)

src/Shared/ServerInfrastructure/StringUtilities.cs

Lines changed: 22 additions & 244 deletions
Original file line numberDiff line numberDiff line change
@@ -11,83 +11,51 @@
1111
using System.Text;
1212

1313
#nullable enable
14+
#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type
1415

1516
namespace Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure;
1617

1718
internal static class StringUtilities
1819
{
20+
private static readonly SpanAction<char, IntPtr> s_getLatin1StringNonNullCharacters = GetLatin1StringNonNullCharacters;
21+
private static readonly SpanAction<char, (string? str, char separator, uint number)> s_populateSpanWithHexSuffix = PopulateSpanWithHexSuffix;
22+
1923
public static unsafe string GetAsciiOrUTF8StringNonNullCharacters(this ReadOnlySpan<byte> span, Encoding defaultEncoding)
2024
{
21-
if (span.IsEmpty)
25+
var resultString = string.Create(span.Length, (IntPtr)(&span), (destination, spanPtr) =>
2226
{
23-
return string.Empty;
24-
}
25-
26-
fixed (byte* source = &MemoryMarshal.GetReference(span))
27-
{
28-
var resultString = string.Create(span.Length, (IntPtr)source, s_getAsciiOrUTF8StringNonNullCharacters);
29-
30-
// If resultString is marked, perform UTF-8 encoding
31-
if (resultString[0] == '\0')
27+
if (Ascii.ToUtf16(*(ReadOnlySpan<byte>*)spanPtr, destination, out _) != OperationStatus.Done)
3228
{
33-
// null characters are considered invalid
34-
if (span.IndexOf((byte)0) != -1)
35-
{
36-
throw new InvalidOperationException();
37-
}
38-
39-
try
40-
{
41-
resultString = defaultEncoding.GetString(span);
42-
}
43-
catch (DecoderFallbackException)
44-
{
45-
throw new InvalidOperationException();
46-
}
29+
// Mark resultString for UTF-8 encoding
30+
destination[0] = '\0';
4731
}
32+
});
4833

49-
return resultString;
50-
}
51-
}
52-
53-
private static readonly unsafe SpanAction<char, IntPtr> s_getAsciiOrUTF8StringNonNullCharacters = (Span<char> buffer, IntPtr state) =>
54-
{
55-
fixed (char* output = &MemoryMarshal.GetReference(buffer))
34+
// If resultString is marked, perform UTF-8 encoding
35+
if (resultString[0] == '\0')
5636
{
57-
// This version of AsciiUtilities returns false if there are any null ('\0') or non-Ascii
58-
// character (> 127) in the string.
59-
if (!TryGetAsciiString((byte*)state.ToPointer(), output, buffer.Length))
37+
try
6038
{
61-
// Mark resultString for UTF-8 encoding
62-
output[0] = '\0';
39+
resultString = defaultEncoding.GetString(span);
40+
}
41+
catch (DecoderFallbackException)
42+
{
43+
throw new InvalidOperationException();
6344
}
64-
}
65-
};
66-
67-
public static unsafe string GetAsciiStringNonNullCharacters(this ReadOnlySpan<byte> span)
68-
{
69-
if (span.IsEmpty)
70-
{
71-
return string.Empty;
7245
}
7346

74-
fixed (byte* source = &MemoryMarshal.GetReference(span))
75-
{
76-
return string.Create(span.Length, (IntPtr)source, s_getAsciiStringNonNullCharacters);
77-
}
47+
return resultString;
7848
}
7949

80-
private static readonly unsafe SpanAction<char, IntPtr> s_getAsciiStringNonNullCharacters = (Span<char> buffer, IntPtr state) =>
50+
public static unsafe string GetAsciiStringNonNullCharacters(this ReadOnlySpan<byte> span)
8151
{
82-
fixed (char* output = &MemoryMarshal.GetReference(buffer))
52+
return string.Create(span.Length, (IntPtr)(&span), (destination, spanPtr) =>
8353
{
84-
// This version of AsciiUtilities returns false if there are any null ('\0') or non-Ascii
85-
// character (> 127) in the string.
86-
if (!TryGetAsciiString((byte*)state.ToPointer(), output, buffer.Length))
54+
if (Ascii.ToUtf16(*(ReadOnlySpan<byte>*)spanPtr, destination, out _) != OperationStatus.Done)
8755
{
8856
throw new InvalidOperationException();
8957
}
90-
}
58+
});
9159
};
9260

9361
public static unsafe string GetLatin1StringNonNullCharacters(this ReadOnlySpan<byte> span)
@@ -115,196 +83,6 @@ public static unsafe string GetLatin1StringNonNullCharacters(this ReadOnlySpan<b
11583
}
11684
};
11785

118-
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
119-
public static unsafe bool TryGetAsciiString(byte* input, char* output, int count)
120-
{
121-
Debug.Assert(input != null);
122-
Debug.Assert(output != null);
123-
124-
var end = input + count;
125-
126-
Debug.Assert((long)end >= Vector256<sbyte>.Count);
127-
128-
// PERF: so the JIT can reuse the zero from a register
129-
var zero = Vector128<sbyte>.Zero;
130-
131-
if (Sse2.IsSupported)
132-
{
133-
if (Avx2.IsSupported && input <= end - Vector256<sbyte>.Count)
134-
{
135-
var avxZero = Vector256<sbyte>.Zero;
136-
137-
do
138-
{
139-
var vector = Avx.LoadVector256(input).AsSByte();
140-
if (!CheckBytesInAsciiRange(vector, avxZero))
141-
{
142-
return false;
143-
}
144-
145-
var tmp0 = Avx2.UnpackLow(vector, avxZero);
146-
var tmp1 = Avx2.UnpackHigh(vector, avxZero);
147-
148-
// Bring into the right order
149-
var out0 = Avx2.Permute2x128(tmp0, tmp1, 0x20);
150-
var out1 = Avx2.Permute2x128(tmp0, tmp1, 0x31);
151-
152-
Avx.Store((ushort*)output, out0.AsUInt16());
153-
Avx.Store((ushort*)output + Vector256<ushort>.Count, out1.AsUInt16());
154-
155-
input += Vector256<sbyte>.Count;
156-
output += Vector256<sbyte>.Count;
157-
} while (input <= end - Vector256<sbyte>.Count);
158-
159-
if (input == end)
160-
{
161-
return true;
162-
}
163-
}
164-
165-
if (input <= end - Vector128<sbyte>.Count)
166-
{
167-
do
168-
{
169-
var vector = Sse2.LoadVector128(input).AsSByte();
170-
if (!CheckBytesInAsciiRange(vector, zero))
171-
{
172-
return false;
173-
}
174-
175-
var c0 = Sse2.UnpackLow(vector, zero).AsUInt16();
176-
var c1 = Sse2.UnpackHigh(vector, zero).AsUInt16();
177-
178-
Sse2.Store((ushort*)output, c0);
179-
Sse2.Store((ushort*)output + Vector128<ushort>.Count, c1);
180-
181-
input += Vector128<sbyte>.Count;
182-
output += Vector128<sbyte>.Count;
183-
} while (input <= end - Vector128<sbyte>.Count);
184-
185-
if (input == end)
186-
{
187-
return true;
188-
}
189-
}
190-
}
191-
else if (Vector.IsHardwareAccelerated)
192-
{
193-
while (input <= end - Vector<sbyte>.Count)
194-
{
195-
var vector = Unsafe.AsRef<Vector<sbyte>>(input);
196-
if (!CheckBytesInAsciiRange(vector))
197-
{
198-
return false;
199-
}
200-
201-
Vector.Widen(
202-
vector,
203-
out Unsafe.AsRef<Vector<short>>(output),
204-
out Unsafe.AsRef<Vector<short>>(output + Vector<short>.Count));
205-
206-
input += Vector<sbyte>.Count;
207-
output += Vector<sbyte>.Count;
208-
}
209-
210-
if (input == end)
211-
{
212-
return true;
213-
}
214-
}
215-
216-
if (Environment.Is64BitProcess) // Use Intrinsic switch for branch elimination
217-
{
218-
// 64-bit: Loop longs by default
219-
while (input <= end - sizeof(long))
220-
{
221-
var value = *(long*)input;
222-
if (!CheckBytesInAsciiRange(value))
223-
{
224-
return false;
225-
}
226-
227-
// BMI2 could be used, but this variant is faster on both Intel and AMD.
228-
if (Sse2.X64.IsSupported)
229-
{
230-
var vecNarrow = Sse2.X64.ConvertScalarToVector128Int64(value).AsSByte();
231-
var vecWide = Sse2.UnpackLow(vecNarrow, zero).AsUInt64();
232-
Sse2.Store((ulong*)output, vecWide);
233-
}
234-
else
235-
{
236-
output[0] = (char)input[0];
237-
output[1] = (char)input[1];
238-
output[2] = (char)input[2];
239-
output[3] = (char)input[3];
240-
output[4] = (char)input[4];
241-
output[5] = (char)input[5];
242-
output[6] = (char)input[6];
243-
output[7] = (char)input[7];
244-
}
245-
246-
input += sizeof(long);
247-
output += sizeof(long);
248-
}
249-
250-
if (input <= end - sizeof(int))
251-
{
252-
var value = *(int*)input;
253-
if (!CheckBytesInAsciiRange(value))
254-
{
255-
return false;
256-
}
257-
258-
WidenFourAsciiBytesToUtf16AndWriteToBuffer(output, input, value, zero);
259-
260-
input += sizeof(int);
261-
output += sizeof(int);
262-
}
263-
}
264-
else
265-
{
266-
// 32-bit: Loop ints by default
267-
while (input <= end - sizeof(int))
268-
{
269-
var value = *(int*)input;
270-
if (!CheckBytesInAsciiRange(value))
271-
{
272-
return false;
273-
}
274-
275-
WidenFourAsciiBytesToUtf16AndWriteToBuffer(output, input, value, zero);
276-
277-
input += sizeof(int);
278-
output += sizeof(int);
279-
}
280-
}
281-
282-
if (input <= end - sizeof(short))
283-
{
284-
if (!CheckBytesInAsciiRange(((short*)input)[0]))
285-
{
286-
return false;
287-
}
288-
289-
output[0] = (char)input[0];
290-
output[1] = (char)input[1];
291-
292-
input += sizeof(short);
293-
output += sizeof(short);
294-
}
295-
296-
if (input < end)
297-
{
298-
if (!CheckBytesInAsciiRange(((sbyte*)input)[0]))
299-
{
300-
return false;
301-
}
302-
output[0] = (char)input[0];
303-
}
304-
305-
return true;
306-
}
307-
30886
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
30987
public static unsafe bool TryGetLatin1String(byte* input, char* output, int count)
31088
{

0 commit comments

Comments
 (0)