-
Notifications
You must be signed in to change notification settings - Fork 32
[RFC 9651] Add support for Display String type to RawStructuredFieldValues #43
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
f4dd60f
31da3be
1094d86
4f8243f
f618c1c
08d05e6
91a0ce5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -224,6 +224,8 @@ extension StructuredFieldValueParser { | |
return try self._parseAToken() | ||
case asciiAt: | ||
return try self._parseADate() | ||
case asciiPercent: | ||
return try self._parseADisplayString() | ||
default: | ||
throw StructuredHeaderError.invalidItem | ||
} | ||
|
@@ -491,6 +493,68 @@ extension StructuredFieldValueParser { | |
return try self._parseAnIntegerOrDecimal(isDate: true) | ||
} | ||
|
||
private mutating func _parseADisplayString() throws -> RFC9651BareItem { | ||
assert(self.underlyingData.first == asciiPercent) | ||
self.underlyingData.consumeFirst() | ||
|
||
guard self.underlyingData.first == asciiDquote else { | ||
throw StructuredHeaderError.invalidDisplayString | ||
} | ||
|
||
self.underlyingData.consumeFirst() | ||
|
||
var byteArray = [UInt8]() | ||
|
||
while let char = self.underlyingData.first { | ||
self.underlyingData.consumeFirst() | ||
|
||
switch char { | ||
case 0x00...0x1F, 0x7F...: | ||
throw StructuredHeaderError.invalidDisplayString | ||
case asciiPercent: | ||
if self.underlyingData.count < 2 { | ||
throw StructuredHeaderError.invalidDisplayString | ||
} | ||
|
||
let startIndex = self.underlyingData.startIndex | ||
let secondIndex = self.underlyingData.index(after: startIndex) | ||
let octetHex = self.underlyingData[...secondIndex] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can skip the index math and do |
||
|
||
self.underlyingData = self.underlyingData.dropFirst(2) | ||
|
||
guard | ||
octetHex.allSatisfy({ asciiDigits.contains($0) || asciiLowercases.contains($0) }), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would be somewhere I'd write a little micro-extension method for this purpose, as we know that there are exactly two values and so we don't strictly need a loop. |
||
let octet = UInt8.decodeHex(octetHex) | ||
else { | ||
throw StructuredHeaderError.invalidDisplayString | ||
} | ||
|
||
byteArray.append(octet) | ||
case asciiDquote: | ||
let unicodeSequence = try byteArray.withUnsafeBytes { | ||
try $0.withMemoryRebound(to: CChar.self) { | ||
guard let baseAddress = $0.baseAddress else { | ||
throw StructuredHeaderError.invalidDisplayString | ||
} | ||
|
||
return String(validatingUTF8: baseAddress) | ||
} | ||
} | ||
|
||
guard let unicodeSequence else { | ||
throw StructuredHeaderError.invalidDisplayString | ||
} | ||
|
||
return .displayString(unicodeSequence) | ||
default: | ||
byteArray.append(char) | ||
} | ||
} | ||
|
||
// Fail parsing — reached the end of the string without finding a closing DQUOTE. | ||
throw StructuredHeaderError.invalidDisplayString | ||
} | ||
|
||
private mutating func _parseParameters() throws -> OrderedMap<Key, RFC9651BareItem> { | ||
var parameters = OrderedMap<Key, RFC9651BareItem>() | ||
|
||
|
@@ -643,3 +707,36 @@ extension StrippingStringEscapesCollection.Index: Comparable { | |
lhs._baseIndex < rhs._baseIndex | ||
} | ||
} | ||
|
||
extension UInt8 { | ||
/// Converts a hex value given in UTF8 to base 10. | ||
fileprivate static func decodeHex<Bytes: RandomAccessCollection>(_ bytes: Bytes) -> Self? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is also excessively generic I think. We only ever accept a single data type, and it's always two bytes long. It might actually be useful to express that as its own type, e.g:
That lets us express the rest of this math in a much less branchy way, and with an avoidance of preconditions elsewhere. |
||
where Bytes.Element == Self { | ||
var result = Self(0) | ||
var power = Self(bytes.count) | ||
|
||
for byte in bytes { | ||
power -= 1 | ||
|
||
guard let integer = Self.htoi(byte) else { return nil } | ||
result += integer << (power * 4) | ||
} | ||
|
||
return result | ||
} | ||
|
||
/// Converts a hex character given in UTF8 to its integer value. | ||
private static func htoi(_ value: Self) -> Self? { | ||
let charA = Self(UnicodeScalar("a").value) | ||
let char0 = Self(UnicodeScalar("0").value) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Prefer the |
||
|
||
switch value { | ||
case char0...char0 + 9: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are a touch nicer using named values at both ends of the range. |
||
return value - char0 | ||
case charA...charA + 5: | ||
return value - charA + 10 | ||
default: | ||
return nil | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -213,6 +213,26 @@ extension StructuredFieldValueSerializer { | |
} | ||
|
||
self.data.append(contentsOf: String(date, radix: 10).utf8) | ||
case .displayString(let displayString): | ||
let bytes = displayString.utf8 | ||
|
||
self.data.append(asciiPercent) | ||
self.data.append(asciiDquote) | ||
|
||
for byte in bytes { | ||
if byte == asciiPercent | ||
|| byte == asciiDquote | ||
|| (0x00...0x1F).contains(byte) | ||
|| (0x7F...).contains(byte) | ||
{ | ||
self.data.append(asciiPercent) | ||
self.data.append(contentsOf: String(byte, radix: 16, uppercase: false).utf8) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can avoid constructing a String here, if we write the code out. |
||
} else { | ||
self.data.append(byte) | ||
} | ||
} | ||
|
||
self.data.append(asciiDquote) | ||
} | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
[ | ||
{ | ||
"name": "basic display string (ascii content)", | ||
"raw": ["%\"foo bar\""], | ||
"header_type": "item", | ||
"expected": [{"__type": "displaystring", "value": "foo bar"}, {}] | ||
}, | ||
{ | ||
"name": "all printable ascii", | ||
"raw": ["%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\""], | ||
"header_type": "item", | ||
"expected": [{"__type": "displaystring", "value": " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"}, {}] | ||
}, | ||
{ | ||
"name": "non-ascii display string (uppercase escaping)", | ||
"raw": ["%\"f%C3%BC%C3%BC\""], | ||
"canonical": ["%\"f%c3%bc%c3%bc\""], | ||
"header_type": "item", | ||
"must_fail": true | ||
}, | ||
{ | ||
"name": "non-ascii display string (lowercase escaping)", | ||
"raw": ["%\"f%c3%bc%c3%bc\""], | ||
"header_type": "item", | ||
"expected": [{"__type": "displaystring", "value": "füü"}, {}] | ||
}, | ||
{ | ||
"name": "tab in display string", | ||
"raw": ["%\"\t\""], | ||
"header_type": "item", | ||
"must_fail": true | ||
}, | ||
{ | ||
"name": "newline in display string", | ||
"raw": ["%\"\n\""], | ||
"header_type": "item", | ||
"must_fail": true | ||
}, | ||
{ | ||
"name": "single quoted display string", | ||
"raw": ["%'foo'"], | ||
"header_type": "item", | ||
"must_fail": true | ||
}, | ||
{ | ||
"name": "unquoted display string", | ||
"raw": ["%foo"], | ||
"header_type": "item", | ||
"must_fail": true | ||
}, | ||
{ | ||
"name": "display string missing initial quote", | ||
"raw": ["%foo\""], | ||
"header_type": "item", | ||
"must_fail": true | ||
}, | ||
{ | ||
"name": "unbalanced display string", | ||
"raw": ["%\"foo"], | ||
"header_type": "item", | ||
"must_fail": true | ||
}, | ||
{ | ||
"name": "display string quoting", | ||
"raw": ["%\"foo %22bar%22 \\ baz\""], | ||
"header_type": "item", | ||
"expected": [{"__type": "displaystring", "value": "foo \"bar\" \\ baz"}, {}] | ||
}, | ||
{ | ||
"name": "bad display string escaping", | ||
"raw": ["%\"foo %a"], | ||
"header_type": "item", | ||
"must_fail": true | ||
}, | ||
{ | ||
"name": "bad display string utf-8 (invalid 2-byte seq)", | ||
"raw": ["%\"%c3%28\""], | ||
"header_type": "item", | ||
"must_fail": true | ||
}, | ||
{ | ||
"name": "bad display string utf-8 (invalid sequence id)", | ||
"raw": ["%\"%a0%a1\""], | ||
"header_type": "item", | ||
"must_fail": true | ||
}, | ||
{ | ||
"name": "bad display string utf-8 (invalid hex)", | ||
"raw": ["%\"%g0%1w\""], | ||
"header_type": "item", | ||
"must_fail": true | ||
}, | ||
{ | ||
"name": "bad display string utf-8 (invalid 3-byte seq)", | ||
"raw": ["%\"%e2%28%a1\""], | ||
"header_type": "item", | ||
"must_fail": true | ||
}, | ||
{ | ||
"name": "bad display string utf-8 (invalid 4-byte seq)", | ||
"raw": ["%\"%f0%28%8c%28\""], | ||
"header_type": "item", | ||
"must_fail": true | ||
}, | ||
{ | ||
"name": "BOM in display string", | ||
"raw": ["%\"BOM: %ef%bb%bf\""], | ||
"header_type": "item", | ||
"expected": [{"__type": "displaystring", "value": "BOM: \uFEFF"}, {}] | ||
} | ||
] |
Uh oh!
There was an error while loading. Please reload this page.