diff --git a/Sources/RawStructuredFieldValues/ASCII.swift b/Sources/RawStructuredFieldValues/ASCII.swift index 6a56010..a411a72 100644 --- a/Sources/RawStructuredFieldValues/ASCII.swift +++ b/Sources/RawStructuredFieldValues/ASCII.swift @@ -45,8 +45,12 @@ let asciiSlash = UInt8(ascii: "/") let asciiPeriod = UInt8(ascii: ".") let asciiComma = UInt8(ascii: ",") let asciiCapitalA = UInt8(ascii: "A") +let asciiCapitalF = UInt8(ascii: "F") let asciiCapitalZ = UInt8(ascii: "Z") let asciiLowerA = UInt8(ascii: "a") +let asciiLowerF = UInt8(ascii: "f") let asciiLowerZ = UInt8(ascii: "z") let asciiCapitals = asciiCapitalA...asciiCapitalZ let asciiLowercases = asciiLowerA...asciiLowerZ +let asciiHexCapitals = asciiCapitalA...asciiCapitalF +let asciiHexLowercases = asciiLowerA...asciiLowerF diff --git a/Sources/RawStructuredFieldValues/ComponentTypes.swift b/Sources/RawStructuredFieldValues/ComponentTypes.swift index 2b7ebb5..7fdae33 100644 --- a/Sources/RawStructuredFieldValues/ComponentTypes.swift +++ b/Sources/RawStructuredFieldValues/ComponentTypes.swift @@ -110,6 +110,8 @@ extension BareItem { case .date: throw StructuredHeaderError.invalidItem + case .displayString: + throw StructuredHeaderError.invalidItem } } } @@ -141,6 +143,9 @@ public enum RFC9651BareItem: Sendable { /// A date item. case date(Int) + + /// A display string item. + case displayString(String) } extension RFC9651BareItem: ExpressibleByBooleanLiteral { diff --git a/Sources/RawStructuredFieldValues/Errors.swift b/Sources/RawStructuredFieldValues/Errors.swift index acb3b2a..1a5a3d5 100644 --- a/Sources/RawStructuredFieldValues/Errors.swift +++ b/Sources/RawStructuredFieldValues/Errors.swift @@ -27,6 +27,7 @@ public struct StructuredHeaderError: Error, Sendable { case invalidBoolean case invalidToken case invalidDate + case invalidDisplayString case invalidList case invalidDictionary case missingKey @@ -53,6 +54,7 @@ extension StructuredHeaderError { public static let invalidBoolean = StructuredHeaderError(.invalidBoolean) public static let invalidToken = StructuredHeaderError(.invalidToken) public static let invalidDate = StructuredHeaderError(.invalidDate) + public static let invalidDisplayString = StructuredHeaderError(.invalidDisplayString) public static let invalidList = StructuredHeaderError(.invalidList) public static let invalidDictionary = StructuredHeaderError(.invalidDictionary) public static let missingKey = StructuredHeaderError(.missingKey) diff --git a/Sources/RawStructuredFieldValues/FieldParser.swift b/Sources/RawStructuredFieldValues/FieldParser.swift index e2467b8..99549e1 100644 --- a/Sources/RawStructuredFieldValues/FieldParser.swift +++ b/Sources/RawStructuredFieldValues/FieldParser.swift @@ -224,6 +224,8 @@ extension StructuredFieldValueParser { return try self._parseAToken() case asciiAt: return try self._parseADate() + case asciiPercent: + return try self._parseADisplayString() default: throw StructuredHeaderError.invalidItem } @@ -491,6 +493,84 @@ extension StructuredFieldValueParser { return try self._parseAnIntegerOrDecimal(isDate: true) } + private mutating func _parseADisplayString() throws -> RFC9651BareItem { + assert(self.underlyingData.first == asciiPercent) + self.underlyingData.consumeFirst() + + guard self.underlyingData.first == asciiDquote else { + throw StructuredHeaderError.invalidDisplayString + } + + self.underlyingData.consumeFirst() + + var byteArray = [UInt8]() + + while let char = self.underlyingData.first { + self.underlyingData.consumeFirst() + + switch char { + case 0x00...0x1F, 0x7F...: + throw StructuredHeaderError.invalidDisplayString + case asciiPercent: + if self.underlyingData.count < 2 { + throw StructuredHeaderError.invalidDisplayString + } + + let octetHex = EncodedHex(self.underlyingData.prefix(2)) + + self.underlyingData = self.underlyingData.dropFirst(2) + + guard let octet = octetHex.decode() else { + throw StructuredHeaderError.invalidDisplayString + } + + byteArray.append(octet) + case asciiDquote: + #if compiler(>=6.0) + if #available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *) { + let unicodeSequence = String(validating: byteArray, as: UTF8.self) + + guard let unicodeSequence else { + throw StructuredHeaderError.invalidDisplayString + } + + return .displayString(unicodeSequence) + } else { + return try _decodeDisplayString(byteArray: &byteArray) + } + #else + return try _decodeDisplayString(byteArray: &byteArray) + #endif + default: + byteArray.append(char) + } + } + + // Fail parsing — reached the end of the string without finding a closing DQUOTE. + throw StructuredHeaderError.invalidDisplayString + } + + /// This method is called in environments where `String(validating:as:)` is unavailable. It uses + /// `String(validatingUTF8:)` which requires `byteArray` to be null terminated. `String(validating:as:)` + /// does not require that requirement. Therefore, it does not perform null checks, which makes it more optimal. + private func _decodeDisplayString(byteArray: inout [UInt8]) throws -> RFC9651BareItem { + // String(validatingUTF8:) requires byteArray to be null-terminated. + byteArray.append(0) + + let unicodeSequence = byteArray.withUnsafeBytes { + $0.withMemoryRebound(to: CChar.self) { + // This force-unwrap is safe, as the buffer must successfully bind to CChar. + String(validatingUTF8: $0.baseAddress!) + } + } + + guard let unicodeSequence else { + throw StructuredHeaderError.invalidDisplayString + } + + return .displayString(unicodeSequence) + } + private mutating func _parseParameters() throws -> OrderedMap { var parameters = OrderedMap() @@ -643,3 +723,39 @@ extension StrippingStringEscapesCollection.Index: Comparable { lhs._baseIndex < rhs._baseIndex } } + +/// `EncodedHex` represents a (possibly invalid) hex value in UTF8. +struct EncodedHex { + private(set) var firstChar: UInt8 + private(set) var secondChar: UInt8 + + init(_ bytes: Bytes) where Bytes.Element == UInt8 { + precondition(bytes.count == 2) + self.firstChar = bytes[bytes.startIndex] + self.secondChar = bytes[bytes.index(after: bytes.startIndex)] + } + + /// Validates and converts `EncodedHex` to a base 10 UInt8. + /// + /// If `EncodedHex` does not represent a valid hex value, the result of this method is nil. + fileprivate func decode() -> UInt8? { + guard + let firstCharAsInteger = self.htoi(self.firstChar), + let secondCharAsInteger = self.htoi(self.secondChar) + else { return nil } + + return (firstCharAsInteger << 4) + secondCharAsInteger + } + + /// Converts a hex character given in UTF8 to its integer value. + private func htoi(_ asciiChar: UInt8) -> UInt8? { + switch asciiChar { + case asciiZero...asciiNine: + return asciiChar - asciiZero + case asciiLowerA...asciiLowerF: + return asciiChar - asciiLowerA + 10 + default: + return nil + } + } +} diff --git a/Sources/RawStructuredFieldValues/FieldSerializer.swift b/Sources/RawStructuredFieldValues/FieldSerializer.swift index 964ecdb..9803861 100644 --- a/Sources/RawStructuredFieldValues/FieldSerializer.swift +++ b/Sources/RawStructuredFieldValues/FieldSerializer.swift @@ -213,6 +213,29 @@ extension StructuredFieldValueSerializer { } self.data.append(contentsOf: String(date, radix: 10).utf8) + case .displayString(let displayString): + let bytes = displayString.utf8 + + self.data.append(asciiPercent) + self.data.append(asciiDquote) + + for byte in bytes { + if byte == asciiPercent + || byte == asciiDquote + || (0x00...0x1F).contains(byte) + || (0x7F...).contains(byte) + { + self.data.append(asciiPercent) + + let encodedByte = UInt8.encodeToHex(byte) + self.data.append(encodedByte.firstChar) + self.data.append(encodedByte.secondChar) + } else { + self.data.append(byte) + } + } + + self.data.append(asciiDquote) } } } @@ -245,3 +268,18 @@ extension String { } } } + +extension UInt8 { + /// Converts an integer in base 10 to hex of type `EncodedHex`. + fileprivate static func encodeToHex(_ int: Self) -> EncodedHex { + let firstChar = self.itoh(int >> 4) + let secondChar = self.itoh(int & 0x0F) + + return EncodedHex([firstChar, secondChar]) + } + + /// Converts an integer to its hex character in UTF8. + private static func itoh(_ int: Self) -> Self { + (int > 9) ? (asciiLowerA + int - 10) : (asciiZero + int) + } +} diff --git a/Sources/sh-parser/main.swift b/Sources/sh-parser/main.swift index 75deb27..54a1366 100644 --- a/Sources/sh-parser/main.swift +++ b/Sources/sh-parser/main.swift @@ -171,6 +171,8 @@ extension RFC9651BareItem { return "decimal \(d)" case .date(let date): return "date \(date)" + case .displayString(let displayString): + return "display string \(displayString)" } } } diff --git a/Tests/StructuredFieldValuesTests/StructuredFieldParserTests.swift b/Tests/StructuredFieldValuesTests/StructuredFieldParserTests.swift index 5f40a86..c8ec7ac 100644 --- a/Tests/StructuredFieldValuesTests/StructuredFieldParserTests.swift +++ b/Tests/StructuredFieldValuesTests/StructuredFieldParserTests.swift @@ -87,6 +87,24 @@ final class StructuredFieldParserTests: XCTestCase { XCTAssertEqual(typeName, "date", "\(fixtureName): Expected type date, got type \(typeName)") XCTAssertEqual(typeValue, baseDate, "\(fixtureName): Got \(baseDate), expected \(typeValue)") + case (.displayString(let baseDisplayString), .dictionary(let typeDictionary)): + guard typeDictionary.count == 2, case .string(let typeName) = typeDictionary["__type"], + case .string(let typeValue) = typeDictionary["value"] + else { + XCTFail("\(fixtureName): Unexpected type dict \(typeDictionary)") + return + } + + XCTAssertEqual( + typeName, + "displaystring", + "\(fixtureName): Expected type displaystring, got type \(typeName)" + ) + XCTAssertEqual( + typeValue, + baseDisplayString, + "\(fixtureName): Got \(baseDisplayString), expected \(typeValue)" + ) default: XCTFail("\(fixtureName): Got \(bareItem), expected \(schema)") } diff --git a/Tests/StructuredFieldValuesTests/StructuredFieldSerializerTests.swift b/Tests/StructuredFieldValuesTests/StructuredFieldSerializerTests.swift index 768a9f8..10f8482 100644 --- a/Tests/StructuredFieldValuesTests/StructuredFieldSerializerTests.swift +++ b/Tests/StructuredFieldValuesTests/StructuredFieldSerializerTests.swift @@ -214,6 +214,9 @@ extension RFC9651BareItem { case (.some(.string("date")), .some(.integer(let value))): self = .date(value) + case (.some(.string("displaystring")), .some(.string(let value))): + self = .displayString(value) + default: preconditionFailure("Unexpected type object \(typeObject)") } diff --git a/Tests/TestFixtures/display-string.json b/Tests/TestFixtures/display-string.json new file mode 100644 index 0000000..351f15b --- /dev/null +++ b/Tests/TestFixtures/display-string.json @@ -0,0 +1,111 @@ +[ + { + "name": "basic display string (ascii content)", + "raw": ["%\"foo bar\""], + "header_type": "item", + "expected": [{"__type": "displaystring", "value": "foo bar"}, {}] + }, + { + "name": "all printable ascii", + "raw": ["%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\""], + "header_type": "item", + "expected": [{"__type": "displaystring", "value": " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"}, {}] + }, + { + "name": "non-ascii display string (uppercase escaping)", + "raw": ["%\"f%C3%BC%C3%BC\""], + "canonical": ["%\"f%c3%bc%c3%bc\""], + "header_type": "item", + "must_fail": true + }, + { + "name": "non-ascii display string (lowercase escaping)", + "raw": ["%\"f%c3%bc%c3%bc\""], + "header_type": "item", + "expected": [{"__type": "displaystring", "value": "füü"}, {}] + }, + { + "name": "tab in display string", + "raw": ["%\"\t\""], + "header_type": "item", + "must_fail": true + }, + { + "name": "newline in display string", + "raw": ["%\"\n\""], + "header_type": "item", + "must_fail": true + }, + { + "name": "single quoted display string", + "raw": ["%'foo'"], + "header_type": "item", + "must_fail": true + }, + { + "name": "unquoted display string", + "raw": ["%foo"], + "header_type": "item", + "must_fail": true + }, + { + "name": "display string missing initial quote", + "raw": ["%foo\""], + "header_type": "item", + "must_fail": true + }, + { + "name": "unbalanced display string", + "raw": ["%\"foo"], + "header_type": "item", + "must_fail": true + }, + { + "name": "display string quoting", + "raw": ["%\"foo %22bar%22 \\ baz\""], + "header_type": "item", + "expected": [{"__type": "displaystring", "value": "foo \"bar\" \\ baz"}, {}] + }, + { + "name": "bad display string escaping", + "raw": ["%\"foo %a"], + "header_type": "item", + "must_fail": true + }, + { + "name": "bad display string utf-8 (invalid 2-byte seq)", + "raw": ["%\"%c3%28\""], + "header_type": "item", + "must_fail": true + }, + { + "name": "bad display string utf-8 (invalid sequence id)", + "raw": ["%\"%a0%a1\""], + "header_type": "item", + "must_fail": true + }, + { + "name": "bad display string utf-8 (invalid hex)", + "raw": ["%\"%g0%1w\""], + "header_type": "item", + "must_fail": true + }, + { + "name": "bad display string utf-8 (invalid 3-byte seq)", + "raw": ["%\"%e2%28%a1\""], + "header_type": "item", + "must_fail": true + }, + { + "name": "bad display string utf-8 (invalid 4-byte seq)", + "raw": ["%\"%f0%28%8c%28\""], + "header_type": "item", + "must_fail": true + }, + { + "name": "BOM in display string", + "raw": ["%\"BOM: %ef%bb%bf\""], + "header_type": "item", + "expected": [{"__type": "displaystring", "value": "BOM: \uFEFF"}, {}] + } +]