@@ -37,7 +37,7 @@ public class ContentDispositionHeaderValue
37
37
38
38
// attr-char definition from RFC5987
39
39
// Same as token except ( "*" / "'" / "%" )
40
- private static readonly SearchValues < char > AttrChar =
40
+ private static readonly SearchValues < char > Rfc5987AttrChar =
41
41
SearchValues . Create ( "!#$&+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~" ) ;
42
42
43
43
private static readonly HttpHeaderParser < ContentDispositionHeaderValue > Parser
@@ -618,54 +618,36 @@ private static bool TryDecodeMime(StringSegment input, [NotNullWhen(true)] out s
618
618
private static string Encode5987 ( StringSegment input )
619
619
{
620
620
var builder = new StringBuilder ( "UTF-8\' \' " ) ;
621
-
622
- var maxInputBytes = Encoding . UTF8 . GetMaxByteCount ( input . Length ) ;
623
- byte [ ] ? bufferFromPool = null ;
624
- Span < byte > inputBytes = maxInputBytes <= MaxStackAllocSizeBytes
625
- ? stackalloc byte [ MaxStackAllocSizeBytes ]
626
- : bufferFromPool = ArrayPool < byte > . Shared . Rent ( maxInputBytes ) ;
627
-
628
- var bytesWritten = Encoding . UTF8 . GetBytes ( input , inputBytes ) ;
629
- inputBytes = inputBytes [ ..bytesWritten ] ;
630
-
631
- int totalBytesConsumed = 0 ;
632
- while ( totalBytesConsumed < inputBytes . Length )
621
+ var remaining = input . AsSpan ( ) ;
622
+ while ( remaining . Length > 0 )
633
623
{
634
- if ( Ascii . IsValid ( inputBytes [ totalBytesConsumed ] ) )
624
+ var length = remaining . IndexOfAnyExcept ( Rfc5987AttrChar ) ;
625
+ if ( length < 0 )
635
626
{
636
- // This is an ASCII char. Let's handle it ourselves.
637
-
638
- char c = ( char ) inputBytes [ totalBytesConsumed ] ;
639
- if ( ! AttrChar . Contains ( c ) )
640
- {
641
- HexEscape ( builder , c ) ;
642
- }
643
- else
644
- {
645
- builder . Append ( c ) ;
646
- }
647
-
648
- totalBytesConsumed ++ ;
627
+ length = remaining . Length ;
649
628
}
650
- else
651
- {
652
- // Non-ASCII, let's rely on Rune to decode it.
629
+ builder . Append ( remaining [ ..length ] ) ;
653
630
654
- Rune . DecodeFromUtf8 ( inputBytes . Slice ( totalBytesConsumed ) , out Rune r , out int bytesConsumedForRune ) ;
655
- Contract . Assert ( ! r . IsAscii , "We shouldn't have gotten here if the Rune is ASCII." ) ;
631
+ remaining = remaining . Slice ( length ) ;
632
+ if ( remaining . Length == 0 )
633
+ {
634
+ break ;
635
+ }
656
636
657
- for ( int i = 0 ; i < bytesConsumedForRune ; i ++ )
658
- {
659
- HexEscape ( builder , ( char ) inputBytes [ totalBytesConsumed + i ] ) ;
660
- }
637
+ length = remaining . IndexOfAny ( Rfc5987AttrChar ) ;
638
+ if ( length < 0 )
639
+ {
640
+ length = remaining . Length ;
641
+ }
661
642
662
- totalBytesConsumed += bytesConsumedForRune ;
643
+ for ( var i = 0 ; i < length ; )
644
+ {
645
+ Rune . DecodeFromUtf16 ( remaining . Slice ( i ) , out Rune rune , out var runeLength ) ;
646
+ EncodeToUtf8Hex ( rune , builder ) ;
647
+ i += runeLength ;
663
648
}
664
- }
665
649
666
- if ( bufferFromPool is not null )
667
- {
668
- ArrayPool < byte > . Shared . Return ( bufferFromPool ) ;
650
+ remaining = remaining . Slice ( length ) ;
669
651
}
670
652
671
653
return builder . ToString ( ) ;
@@ -675,11 +657,45 @@ private static string Encode5987(StringSegment input)
675
657
'0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' ,
676
658
'8' , '9' , 'A' , 'B' , 'C' , 'D' , 'E' , 'F' } ;
677
659
678
- private static void HexEscape ( StringBuilder builder , char c )
660
+ private static void EncodeToUtf8Hex ( Rune rune , StringBuilder builder )
679
661
{
680
- builder . Append ( '%' ) ;
681
- builder . Append ( HexUpperChars [ ( c & 0xf0 ) >> 4 ] ) ;
682
- builder . Append ( HexUpperChars [ c & 0xf ] ) ;
662
+ // Inspired by https://source.dot.net/#System.Private.CoreLib/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs TryEncodeToUtf8
663
+ var value = ( uint ) rune . Value ;
664
+ if ( rune . IsAscii )
665
+ {
666
+ var byteValue = ( byte ) value ;
667
+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
668
+ }
669
+ else if ( rune . Value <= 0x7FFu )
670
+ {
671
+ // Scalar 00000yyy yyxxxxxx -> bytes [ 110yyyyy 10xxxxxx ]
672
+ var byteValue = ( byte ) ( ( value + ( 0b110u << 11 ) ) >> 6 ) ;
673
+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
674
+ byteValue = ( byte ) ( ( value & 0x3Fu ) + 0x80u ) ;
675
+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
676
+ }
677
+ else if ( rune . Value <= 0xFFFFu )
678
+ {
679
+ // Scalar zzzzyyyy yyxxxxxx -> bytes [ 1110zzzz 10yyyyyy 10xxxxxx ]
680
+ var byteValue = ( byte ) ( ( value + ( 0b1110 << 16 ) ) >> 12 ) ;
681
+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
682
+ byteValue = ( byte ) ( ( ( value & ( 0x3Fu << 6 ) ) >> 6 ) + 0x80u ) ;
683
+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
684
+ byteValue = ( byte ) ( ( value & 0x3Fu ) + 0x80u ) ;
685
+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
686
+ }
687
+ else
688
+ {
689
+ // Scalar 000uuuuu zzzzyyyy yyxxxxxx -> bytes [ 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx ]
690
+ var byteValue = ( byte ) ( ( value + ( 0b11110 << 21 ) ) >> 18 ) ;
691
+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
692
+ byteValue = ( byte ) ( ( ( value & ( 0x3Fu << 12 ) ) >> 12 ) + 0x80u ) ;
693
+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
694
+ byteValue = ( byte ) ( ( ( value & ( 0x3Fu << 6 ) ) >> 6 ) + 0x80u ) ;
695
+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
696
+ byteValue = ( byte ) ( ( value & 0x3Fu ) + 0x80u ) ;
697
+ builder . Append ( CultureInfo . InvariantCulture , $ "%{ HexUpperChars [ ( byteValue & 0xf0 ) >> 4 ] } { HexUpperChars [ byteValue & 0xf ] } ") ;
698
+ }
683
699
}
684
700
685
701
// Attempt to decode using RFC 5987 encoding.
0 commit comments