Skip to content

Commit 84af599

Browse files
ladeakladeakgfoidl
authored
Using SearchValues in ContentDispositionHeaderValue (#55039)
* Using SearchValues in ContentDispositionHeaderValue Updating current Encode5987 method to use SearchValues and IndexOfAny to encode the FileNameStar property. Adding unit tests and a benchmark to measure the before-after performance * Removing if-else case for the encoding parts with the rune. * Correcting the size of the temp char buffer allocation * Review feedback * Adding test for inputs that does not fit on a stacksize * Using a for loop instead of a while loop for processing toHexEscape * Using ref struct string interpolation with string builder * Using a const for the stackallocted buffer sizes. * Remove unneeded span * Using Rune to encode/decode non ascii characters * With custom utf8 and hex encoding combined * Update src/Http/Headers/src/ContentDispositionHeaderValue.cs Co-authored-by: Günther Foidl <gue@korporal.at> --------- Co-authored-by: ladeak <ladeak87@windowslive.com> Co-authored-by: Günther Foidl <gue@korporal.at>
1 parent 8660e3a commit 84af599

File tree

3 files changed

+128
-45
lines changed

3 files changed

+128
-45
lines changed

src/Http/Headers/src/ContentDispositionHeaderValue.cs

Lines changed: 61 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public class ContentDispositionHeaderValue
3737

3838
// attr-char definition from RFC5987
3939
// Same as token except ( "*" / "'" / "%" )
40-
private static readonly SearchValues<char> AttrChar =
40+
private static readonly SearchValues<char> Rfc5987AttrChar =
4141
SearchValues.Create("!#$&+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~");
4242

4343
private static readonly HttpHeaderParser<ContentDispositionHeaderValue> Parser
@@ -618,54 +618,36 @@ private static bool TryDecodeMime(StringSegment input, [NotNullWhen(true)] out s
618618
private static string Encode5987(StringSegment input)
619619
{
620620
var builder = new StringBuilder("UTF-8\'\'");
621-
622-
var maxInputBytes = Encoding.UTF8.GetMaxByteCount(input.Length);
623-
byte[]? bufferFromPool = null;
624-
Span<byte> inputBytes = maxInputBytes <= MaxStackAllocSizeBytes
625-
? stackalloc byte[MaxStackAllocSizeBytes]
626-
: bufferFromPool = ArrayPool<byte>.Shared.Rent(maxInputBytes);
627-
628-
var bytesWritten = Encoding.UTF8.GetBytes(input, inputBytes);
629-
inputBytes = inputBytes[..bytesWritten];
630-
631-
int totalBytesConsumed = 0;
632-
while (totalBytesConsumed < inputBytes.Length)
621+
var remaining = input.AsSpan();
622+
while (remaining.Length > 0)
633623
{
634-
if (Ascii.IsValid(inputBytes[totalBytesConsumed]))
624+
var length = remaining.IndexOfAnyExcept(Rfc5987AttrChar);
625+
if (length < 0)
635626
{
636-
// This is an ASCII char. Let's handle it ourselves.
637-
638-
char c = (char)inputBytes[totalBytesConsumed];
639-
if (!AttrChar.Contains(c))
640-
{
641-
HexEscape(builder, c);
642-
}
643-
else
644-
{
645-
builder.Append(c);
646-
}
647-
648-
totalBytesConsumed++;
627+
length = remaining.Length;
649628
}
650-
else
651-
{
652-
// Non-ASCII, let's rely on Rune to decode it.
629+
builder.Append(remaining[..length]);
653630

654-
Rune.DecodeFromUtf8(inputBytes.Slice(totalBytesConsumed), out Rune r, out int bytesConsumedForRune);
655-
Contract.Assert(!r.IsAscii, "We shouldn't have gotten here if the Rune is ASCII.");
631+
remaining = remaining.Slice(length);
632+
if (remaining.Length == 0)
633+
{
634+
break;
635+
}
656636

657-
for (int i = 0; i < bytesConsumedForRune; i++)
658-
{
659-
HexEscape(builder, (char)inputBytes[totalBytesConsumed + i]);
660-
}
637+
length = remaining.IndexOfAny(Rfc5987AttrChar);
638+
if (length < 0)
639+
{
640+
length = remaining.Length;
641+
}
661642

662-
totalBytesConsumed += bytesConsumedForRune;
643+
for (var i = 0; i < length;)
644+
{
645+
Rune.DecodeFromUtf16(remaining.Slice(i), out Rune rune, out var runeLength);
646+
EncodeToUtf8Hex(rune, builder);
647+
i += runeLength;
663648
}
664-
}
665649

666-
if (bufferFromPool is not null)
667-
{
668-
ArrayPool<byte>.Shared.Return(bufferFromPool);
650+
remaining = remaining.Slice(length);
669651
}
670652

671653
return builder.ToString();
@@ -675,11 +657,45 @@ private static string Encode5987(StringSegment input)
675657
'0', '1', '2', '3', '4', '5', '6', '7',
676658
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
677659

678-
private static void HexEscape(StringBuilder builder, char c)
660+
private static void EncodeToUtf8Hex(Rune rune, StringBuilder builder)
679661
{
680-
builder.Append('%');
681-
builder.Append(HexUpperChars[(c & 0xf0) >> 4]);
682-
builder.Append(HexUpperChars[c & 0xf]);
662+
// Inspired by https://source.dot.net/#System.Private.CoreLib/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs TryEncodeToUtf8
663+
var value = (uint)rune.Value;
664+
if (rune.IsAscii)
665+
{
666+
var byteValue = (byte)value;
667+
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
668+
}
669+
else if (rune.Value <= 0x7FFu)
670+
{
671+
// Scalar 00000yyy yyxxxxxx -> bytes [ 110yyyyy 10xxxxxx ]
672+
var byteValue = (byte)((value + (0b110u << 11)) >> 6);
673+
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
674+
byteValue = (byte)((value & 0x3Fu) + 0x80u);
675+
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
676+
}
677+
else if (rune.Value <= 0xFFFFu)
678+
{
679+
// Scalar zzzzyyyy yyxxxxxx -> bytes [ 1110zzzz 10yyyyyy 10xxxxxx ]
680+
var byteValue = (byte)((value + (0b1110 << 16)) >> 12);
681+
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
682+
byteValue = (byte)(((value & (0x3Fu << 6)) >> 6) + 0x80u);
683+
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
684+
byteValue = (byte)((value & 0x3Fu) + 0x80u);
685+
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
686+
}
687+
else
688+
{
689+
// Scalar 000uuuuu zzzzyyyy yyxxxxxx -> bytes [ 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx ]
690+
var byteValue = (byte)((value + (0b11110 << 21)) >> 18);
691+
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
692+
byteValue = (byte)(((value & (0x3Fu << 12)) >> 12) + 0x80u);
693+
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
694+
byteValue = (byte)(((value & (0x3Fu << 6)) >> 6) + 0x80u);
695+
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
696+
byteValue = (byte)((value & 0x3Fu) + 0x80u);
697+
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
698+
}
683699
}
684700

685701
// Attempt to decode using RFC 5987 encoding.

src/Http/Headers/test/ContentDispositionHeaderValueTest.cs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System.Globalization;
5+
using System.Text;
56

67
namespace Microsoft.Net.Http.Headers;
78

@@ -212,6 +213,53 @@ public void FileNameStar_NeedsEncoding_EncodedAndDecodedCorrectly()
212213
Assert.Null(contentDisposition.FileNameStar.Value);
213214
}
214215

216+
[Fact]
217+
public void NonValidAscii_WhenNeedsEncoding_UsesHex()
218+
{
219+
var contentDisposition = new ContentDispositionHeaderValue("inline");
220+
contentDisposition.FileNameStar = "a\u0080b";
221+
Assert.Equal($"UTF-8\'\'a%C2%80b", contentDisposition.Parameters.First().Value); //%C2 added because the value in UTF-8 is encoded on 2 bytes.
222+
}
223+
224+
[Fact]
225+
public void LongValidAscii_FullyProcessedWithout()
226+
{
227+
var contentDisposition = new ContentDispositionHeaderValue("inline");
228+
contentDisposition.FileNameStar = new string('a', 400); // 400 is larger to the max stackallow size
229+
Assert.Equal($"UTF-8\'\'{new string('a', 400)}", contentDisposition.Parameters.First().Value);
230+
}
231+
232+
[Fact]
233+
public void FileNameStar_WhenNeedsEncoding_UsesHex()
234+
{
235+
var contentDisposition = new ContentDispositionHeaderValue("inline");
236+
foreach (byte b in Enumerable.Range(0, 128))
237+
{
238+
contentDisposition.FileNameStar = $"a{(char)b}b";
239+
if (b <= 0x20
240+
|| b == '"'
241+
|| b == '%'
242+
|| (b >= 0x27 && b <= 0x2A)
243+
|| b == ','
244+
|| b == '/'
245+
|| (b >= 0x3A && b <= 0x40)
246+
|| (b >= 0x5B && b <= 0x5D)
247+
|| (b >= 0x61 && b <= 0x5D)
248+
|| b == '{'
249+
|| b == '}'
250+
|| b >= 0x7F)
251+
{
252+
var hexC = Convert.ToHexString([b]);
253+
Assert.Equal($"UTF-8\'\'a%{hexC}b", contentDisposition.Parameters.First().Value);
254+
}
255+
else
256+
{
257+
Assert.Equal($"UTF-8\'\'a{(char)b}b", contentDisposition.Parameters.First().Value);
258+
}
259+
contentDisposition.Parameters.Remove(contentDisposition.Parameters.First());
260+
}
261+
}
262+
215263
[Fact]
216264
public void FileNameStar_UnknownOrBadEncoding_PropertyFails()
217265
{
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using BenchmarkDotNet.Attributes;
5+
using Microsoft.Net.Http.Headers;
6+
7+
namespace Microsoft.AspNetCore.Http;
8+
9+
public class ContentDispositionHeaderValueBenchmarks
10+
{
11+
private readonly ContentDispositionHeaderValue _contentDisposition = new ContentDispositionHeaderValue("inline");
12+
13+
[Benchmark]
14+
public void FileNameStarEncoding() => _contentDisposition.FileNameStar = "My TypicalFilename 2024 04 09 08:00:00.dat";
15+
16+
[Benchmark]
17+
public void FileNameStarNoEncoding() => _contentDisposition.FileNameStar = "My_TypicalFilename_2024_04_09-08_00_00.dat";
18+
19+
}

0 commit comments

Comments
 (0)