From b5537201196331e9888f603ef64ae7a1b8dead4a Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 31 May 2025 17:16:43 -0700 Subject: [PATCH] [SPARK-52373] Add `CRC32` struct --- Sources/SparkConnect/CRC32.swift | 81 ++++++++++++++++++++++++ Tests/SparkConnectTests/CRC32Tests.swift | 47 ++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 Sources/SparkConnect/CRC32.swift create mode 100644 Tests/SparkConnectTests/CRC32Tests.swift diff --git a/Sources/SparkConnect/CRC32.swift b/Sources/SparkConnect/CRC32.swift new file mode 100644 index 0000000..7afdf68 --- /dev/null +++ b/Sources/SparkConnect/CRC32.swift @@ -0,0 +1,81 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import Foundation + +public struct CRC32 { + + /// Pre-computed CRC32 table + private static let crcTable: [UInt32] = { + var table = [UInt32](repeating: 0, count: 256) + let polynomial: UInt32 = 0xEDB8_8320 // IEEE 802.3 polynomial + + for i in 0..<256 { + var c = UInt32(i) + for _ in 0..<8 { + if (c & 1) == 1 { + c = polynomial ^ (c >> 1) + } else { + c = c >> 1 + } + } + table[i] = c + } + return table + }() + + /// Calculates the CRC32 checksum for the given Data. + /// + /// - Parameter data: The Data object for which to calculate the checksum. + /// - Returns: The calculated CRC32 checksum as a UInt32. + public static func checksum(data: Data) -> UInt32 { + var crc: UInt32 = 0xFFFF_FFFF + + data.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in + for byte in pointer.bindMemory(to: UInt8.self) { + crc = (crc >> 8) ^ crcTable[Int((crc ^ UInt32(byte)) & 0xFF)] + } + } + return ~crc + } + + /// Calculates the CRC32 checksum for the given String. + /// + /// - Parameter string: The String object for which to calculate the checksum. + /// - Parameter encoding: The encoding to use when converting the string to Data (defaults to .utf8). + /// - Returns: The calculated CRC32 checksum as a UInt32. Returns nil if the string cannot be converted to Data. + public static func checksum(string: String, encoding: String.Encoding = .utf8) -> UInt32? { + guard let data = string.data(using: encoding) else { + return nil + } + return checksum(data: data) + } + + /// Calculates the CRC32 checksum for the given array of bytes. + /// + /// - Parameter bytes: The [UInt8] array for which to calculate the checksum. + /// - Returns: The calculated CRC32 checksum as a UInt32. + public static func checksum(bytes: [UInt8]) -> UInt32 { + var crc: UInt32 = 0xFFFF_FFFF + + for byte in bytes { + crc = (crc >> 8) ^ crcTable[Int((crc ^ UInt32(byte)) & 0xFF)] + } + return ~crc + } +} diff --git a/Tests/SparkConnectTests/CRC32Tests.swift b/Tests/SparkConnectTests/CRC32Tests.swift new file mode 100644 index 0000000..1c2af58 --- /dev/null +++ b/Tests/SparkConnectTests/CRC32Tests.swift @@ -0,0 +1,47 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +import Foundation +import SparkConnect +import Testing + +struct CRC32Tests { + @Test + func testChecksumWithEmptyData() async throws { + #expect(CRC32.checksum(data: Data()) == 0) + #expect(CRC32.checksum(string: "") == 0) + #expect(CRC32.checksum(bytes: []) == 0) + } + + @Test + func testChecksum() async throws { + let str = "Apache Spark Connect Client for Swift" + #expect(CRC32.checksum(string: str, encoding: .ascii) == 2_736_908_745) + #expect(CRC32.checksum(data: str.data(using: .ascii)!) == 2_736_908_745) + #expect(CRC32.checksum(bytes: [UInt8](str.data(using: .ascii)!)) == 2_736_908_745) + } + + @Test + func testLongChecksum() async throws { + let str = String(repeating: "Apache Spark Connect Client for Swift", count: 1000) + #expect(CRC32.checksum(string: str, encoding: .ascii) == 1_985_943_888) + #expect(CRC32.checksum(data: str.data(using: .ascii)!) == 1_985_943_888) + #expect(CRC32.checksum(bytes: [UInt8](str.data(using: .ascii)!)) == 1_985_943_888) + } +}