|
11 | 11 | using System.Text;
|
12 | 12 |
|
13 | 13 | #nullable enable
|
| 14 | +#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type |
14 | 15 |
|
15 | 16 | namespace Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure;
|
16 | 17 |
|
17 | 18 | internal static class StringUtilities
|
18 | 19 | {
|
| 20 | + private static readonly SpanAction<char, IntPtr> s_getLatin1StringNonNullCharacters = GetLatin1StringNonNullCharacters; |
| 21 | + private static readonly SpanAction<char, (string? str, char separator, uint number)> s_populateSpanWithHexSuffix = PopulateSpanWithHexSuffix; |
| 22 | + |
19 | 23 | public static unsafe string GetAsciiOrUTF8StringNonNullCharacters(this ReadOnlySpan<byte> span, Encoding defaultEncoding)
|
20 | 24 | {
|
21 |
| - if (span.IsEmpty) |
| 25 | + var resultString = string.Create(span.Length, (IntPtr)(&span), (destination, spanPtr) => |
22 | 26 | {
|
23 |
| - return string.Empty; |
24 |
| - } |
25 |
| - |
26 |
| - fixed (byte* source = &MemoryMarshal.GetReference(span)) |
27 |
| - { |
28 |
| - var resultString = string.Create(span.Length, (IntPtr)source, s_getAsciiOrUTF8StringNonNullCharacters); |
29 |
| - |
30 |
| - // If resultString is marked, perform UTF-8 encoding |
31 |
| - if (resultString[0] == '\0') |
| 27 | + if (Ascii.ToUtf16(*(ReadOnlySpan<byte>*)spanPtr, destination, out _) != OperationStatus.Done) |
32 | 28 | {
|
33 |
| - // null characters are considered invalid |
34 |
| - if (span.IndexOf((byte)0) != -1) |
35 |
| - { |
36 |
| - throw new InvalidOperationException(); |
37 |
| - } |
38 |
| - |
39 |
| - try |
40 |
| - { |
41 |
| - resultString = defaultEncoding.GetString(span); |
42 |
| - } |
43 |
| - catch (DecoderFallbackException) |
44 |
| - { |
45 |
| - throw new InvalidOperationException(); |
46 |
| - } |
| 29 | + // Mark resultString for UTF-8 encoding |
| 30 | + destination[0] = '\0'; |
47 | 31 | }
|
| 32 | + }); |
48 | 33 |
|
49 |
| - return resultString; |
50 |
| - } |
51 |
| - } |
52 |
| - |
53 |
| - private static readonly unsafe SpanAction<char, IntPtr> s_getAsciiOrUTF8StringNonNullCharacters = (Span<char> buffer, IntPtr state) => |
54 |
| - { |
55 |
| - fixed (char* output = &MemoryMarshal.GetReference(buffer)) |
| 34 | + // If resultString is marked, perform UTF-8 encoding |
| 35 | + if (resultString[0] == '\0') |
56 | 36 | {
|
57 |
| - // This version of AsciiUtilities returns false if there are any null ('\0') or non-Ascii |
58 |
| - // character (> 127) in the string. |
59 |
| - if (!TryGetAsciiString((byte*)state.ToPointer(), output, buffer.Length)) |
| 37 | + try |
60 | 38 | {
|
61 |
| - // Mark resultString for UTF-8 encoding |
62 |
| - output[0] = '\0'; |
| 39 | + resultString = defaultEncoding.GetString(span); |
| 40 | + } |
| 41 | + catch (DecoderFallbackException) |
| 42 | + { |
| 43 | + throw new InvalidOperationException(); |
63 | 44 | }
|
64 |
| - } |
65 |
| - }; |
66 |
| - |
67 |
| - public static unsafe string GetAsciiStringNonNullCharacters(this ReadOnlySpan<byte> span) |
68 |
| - { |
69 |
| - if (span.IsEmpty) |
70 |
| - { |
71 |
| - return string.Empty; |
72 | 45 | }
|
73 | 46 |
|
74 |
| - fixed (byte* source = &MemoryMarshal.GetReference(span)) |
75 |
| - { |
76 |
| - return string.Create(span.Length, (IntPtr)source, s_getAsciiStringNonNullCharacters); |
77 |
| - } |
| 47 | + return resultString; |
78 | 48 | }
|
79 | 49 |
|
80 |
| - private static readonly unsafe SpanAction<char, IntPtr> s_getAsciiStringNonNullCharacters = (Span<char> buffer, IntPtr state) => |
| 50 | + public static unsafe string GetAsciiStringNonNullCharacters(this ReadOnlySpan<byte> span) |
81 | 51 | {
|
82 |
| - fixed (char* output = &MemoryMarshal.GetReference(buffer)) |
| 52 | + return string.Create(span.Length, (IntPtr)(&span), (destination, spanPtr) => |
83 | 53 | {
|
84 |
| - // This version of AsciiUtilities returns false if there are any null ('\0') or non-Ascii |
85 |
| - // character (> 127) in the string. |
86 |
| - if (!TryGetAsciiString((byte*)state.ToPointer(), output, buffer.Length)) |
| 54 | + if (Ascii.ToUtf16(*(ReadOnlySpan<byte>*)spanPtr, destination, out _) != OperationStatus.Done) |
87 | 55 | {
|
88 | 56 | throw new InvalidOperationException();
|
89 | 57 | }
|
90 |
| - } |
| 58 | + }); |
91 | 59 | };
|
92 | 60 |
|
93 | 61 | public static unsafe string GetLatin1StringNonNullCharacters(this ReadOnlySpan<byte> span)
|
@@ -115,196 +83,6 @@ public static unsafe string GetLatin1StringNonNullCharacters(this ReadOnlySpan<b
|
115 | 83 | }
|
116 | 84 | };
|
117 | 85 |
|
118 |
| - [MethodImpl(MethodImplOptions.AggressiveOptimization)] |
119 |
| - public static unsafe bool TryGetAsciiString(byte* input, char* output, int count) |
120 |
| - { |
121 |
| - Debug.Assert(input != null); |
122 |
| - Debug.Assert(output != null); |
123 |
| - |
124 |
| - var end = input + count; |
125 |
| - |
126 |
| - Debug.Assert((long)end >= Vector256<sbyte>.Count); |
127 |
| - |
128 |
| - // PERF: so the JIT can reuse the zero from a register |
129 |
| - var zero = Vector128<sbyte>.Zero; |
130 |
| - |
131 |
| - if (Sse2.IsSupported) |
132 |
| - { |
133 |
| - if (Avx2.IsSupported && input <= end - Vector256<sbyte>.Count) |
134 |
| - { |
135 |
| - var avxZero = Vector256<sbyte>.Zero; |
136 |
| - |
137 |
| - do |
138 |
| - { |
139 |
| - var vector = Avx.LoadVector256(input).AsSByte(); |
140 |
| - if (!CheckBytesInAsciiRange(vector, avxZero)) |
141 |
| - { |
142 |
| - return false; |
143 |
| - } |
144 |
| - |
145 |
| - var tmp0 = Avx2.UnpackLow(vector, avxZero); |
146 |
| - var tmp1 = Avx2.UnpackHigh(vector, avxZero); |
147 |
| - |
148 |
| - // Bring into the right order |
149 |
| - var out0 = Avx2.Permute2x128(tmp0, tmp1, 0x20); |
150 |
| - var out1 = Avx2.Permute2x128(tmp0, tmp1, 0x31); |
151 |
| - |
152 |
| - Avx.Store((ushort*)output, out0.AsUInt16()); |
153 |
| - Avx.Store((ushort*)output + Vector256<ushort>.Count, out1.AsUInt16()); |
154 |
| - |
155 |
| - input += Vector256<sbyte>.Count; |
156 |
| - output += Vector256<sbyte>.Count; |
157 |
| - } while (input <= end - Vector256<sbyte>.Count); |
158 |
| - |
159 |
| - if (input == end) |
160 |
| - { |
161 |
| - return true; |
162 |
| - } |
163 |
| - } |
164 |
| - |
165 |
| - if (input <= end - Vector128<sbyte>.Count) |
166 |
| - { |
167 |
| - do |
168 |
| - { |
169 |
| - var vector = Sse2.LoadVector128(input).AsSByte(); |
170 |
| - if (!CheckBytesInAsciiRange(vector, zero)) |
171 |
| - { |
172 |
| - return false; |
173 |
| - } |
174 |
| - |
175 |
| - var c0 = Sse2.UnpackLow(vector, zero).AsUInt16(); |
176 |
| - var c1 = Sse2.UnpackHigh(vector, zero).AsUInt16(); |
177 |
| - |
178 |
| - Sse2.Store((ushort*)output, c0); |
179 |
| - Sse2.Store((ushort*)output + Vector128<ushort>.Count, c1); |
180 |
| - |
181 |
| - input += Vector128<sbyte>.Count; |
182 |
| - output += Vector128<sbyte>.Count; |
183 |
| - } while (input <= end - Vector128<sbyte>.Count); |
184 |
| - |
185 |
| - if (input == end) |
186 |
| - { |
187 |
| - return true; |
188 |
| - } |
189 |
| - } |
190 |
| - } |
191 |
| - else if (Vector.IsHardwareAccelerated) |
192 |
| - { |
193 |
| - while (input <= end - Vector<sbyte>.Count) |
194 |
| - { |
195 |
| - var vector = Unsafe.AsRef<Vector<sbyte>>(input); |
196 |
| - if (!CheckBytesInAsciiRange(vector)) |
197 |
| - { |
198 |
| - return false; |
199 |
| - } |
200 |
| - |
201 |
| - Vector.Widen( |
202 |
| - vector, |
203 |
| - out Unsafe.AsRef<Vector<short>>(output), |
204 |
| - out Unsafe.AsRef<Vector<short>>(output + Vector<short>.Count)); |
205 |
| - |
206 |
| - input += Vector<sbyte>.Count; |
207 |
| - output += Vector<sbyte>.Count; |
208 |
| - } |
209 |
| - |
210 |
| - if (input == end) |
211 |
| - { |
212 |
| - return true; |
213 |
| - } |
214 |
| - } |
215 |
| - |
216 |
| - if (Environment.Is64BitProcess) // Use Intrinsic switch for branch elimination |
217 |
| - { |
218 |
| - // 64-bit: Loop longs by default |
219 |
| - while (input <= end - sizeof(long)) |
220 |
| - { |
221 |
| - var value = *(long*)input; |
222 |
| - if (!CheckBytesInAsciiRange(value)) |
223 |
| - { |
224 |
| - return false; |
225 |
| - } |
226 |
| - |
227 |
| - // BMI2 could be used, but this variant is faster on both Intel and AMD. |
228 |
| - if (Sse2.X64.IsSupported) |
229 |
| - { |
230 |
| - var vecNarrow = Sse2.X64.ConvertScalarToVector128Int64(value).AsSByte(); |
231 |
| - var vecWide = Sse2.UnpackLow(vecNarrow, zero).AsUInt64(); |
232 |
| - Sse2.Store((ulong*)output, vecWide); |
233 |
| - } |
234 |
| - else |
235 |
| - { |
236 |
| - output[0] = (char)input[0]; |
237 |
| - output[1] = (char)input[1]; |
238 |
| - output[2] = (char)input[2]; |
239 |
| - output[3] = (char)input[3]; |
240 |
| - output[4] = (char)input[4]; |
241 |
| - output[5] = (char)input[5]; |
242 |
| - output[6] = (char)input[6]; |
243 |
| - output[7] = (char)input[7]; |
244 |
| - } |
245 |
| - |
246 |
| - input += sizeof(long); |
247 |
| - output += sizeof(long); |
248 |
| - } |
249 |
| - |
250 |
| - if (input <= end - sizeof(int)) |
251 |
| - { |
252 |
| - var value = *(int*)input; |
253 |
| - if (!CheckBytesInAsciiRange(value)) |
254 |
| - { |
255 |
| - return false; |
256 |
| - } |
257 |
| - |
258 |
| - WidenFourAsciiBytesToUtf16AndWriteToBuffer(output, input, value, zero); |
259 |
| - |
260 |
| - input += sizeof(int); |
261 |
| - output += sizeof(int); |
262 |
| - } |
263 |
| - } |
264 |
| - else |
265 |
| - { |
266 |
| - // 32-bit: Loop ints by default |
267 |
| - while (input <= end - sizeof(int)) |
268 |
| - { |
269 |
| - var value = *(int*)input; |
270 |
| - if (!CheckBytesInAsciiRange(value)) |
271 |
| - { |
272 |
| - return false; |
273 |
| - } |
274 |
| - |
275 |
| - WidenFourAsciiBytesToUtf16AndWriteToBuffer(output, input, value, zero); |
276 |
| - |
277 |
| - input += sizeof(int); |
278 |
| - output += sizeof(int); |
279 |
| - } |
280 |
| - } |
281 |
| - |
282 |
| - if (input <= end - sizeof(short)) |
283 |
| - { |
284 |
| - if (!CheckBytesInAsciiRange(((short*)input)[0])) |
285 |
| - { |
286 |
| - return false; |
287 |
| - } |
288 |
| - |
289 |
| - output[0] = (char)input[0]; |
290 |
| - output[1] = (char)input[1]; |
291 |
| - |
292 |
| - input += sizeof(short); |
293 |
| - output += sizeof(short); |
294 |
| - } |
295 |
| - |
296 |
| - if (input < end) |
297 |
| - { |
298 |
| - if (!CheckBytesInAsciiRange(((sbyte*)input)[0])) |
299 |
| - { |
300 |
| - return false; |
301 |
| - } |
302 |
| - output[0] = (char)input[0]; |
303 |
| - } |
304 |
| - |
305 |
| - return true; |
306 |
| - } |
307 |
| - |
308 | 86 | [MethodImpl(MethodImplOptions.AggressiveOptimization)]
|
309 | 87 | public static unsafe bool TryGetLatin1String(byte* input, char* output, int count)
|
310 | 88 | {
|
|
0 commit comments