From e75e77b22b8220e77196c1379d5f031b36611ec1 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 17 Jun 2025 23:16:47 -0700 Subject: [PATCH] [SPARK-52524] Support `Timestamp` type --- Sources/SparkConnect/DataFrame.swift | 16 ++++++++++++++++ Sources/SparkConnect/Row.swift | 2 ++ Tests/SparkConnectTests/DataFrameTests.swift | 13 +++++++++++++ 3 files changed, 31 insertions(+) diff --git a/Sources/SparkConnect/DataFrame.swift b/Sources/SparkConnect/DataFrame.swift index 2f590de..bbd98bd 100644 --- a/Sources/SparkConnect/DataFrame.swift +++ b/Sources/SparkConnect/DataFrame.swift @@ -429,6 +429,22 @@ public actor DataFrame: Sendable { values.append(array.asAny(i) as? Decimal) case .primitiveInfo(.date32): values.append(array.asAny(i) as! Date) + case .timeInfo(.timestamp): + let timestampType = column.data.type as! ArrowTypeTimestamp + assert(timestampType.timezone == "Etc/UTC") + let timestamp = array.asAny(i) as! Int64 + let timeInterval = + switch timestampType.unit { + case .seconds: + TimeInterval(timestamp) + case .milliseconds: + TimeInterval(timestamp) / 1_000 + case .microseconds: + TimeInterval(timestamp) / 1_000_000 + case .nanoseconds: + TimeInterval(timestamp) / 1_000_000_000 + } + values.append(Date(timeIntervalSince1970: timeInterval)) case ArrowType.ArrowBinary: values.append((array as! AsString).asString(i).utf8) case .complexInfo(.strct): diff --git a/Sources/SparkConnect/Row.swift b/Sources/SparkConnect/Row.swift index 249ccae..56418d2 100644 --- a/Sources/SparkConnect/Row.swift +++ b/Sources/SparkConnect/Row.swift @@ -71,6 +71,8 @@ public struct Row: Sendable, Equatable { return a == b } else if let a = x as? Decimal, let b = y as? Decimal { return a == b + } else if let a = x as? Date, let b = y as? Date { + return a == b } else if let a = x as? String, let b = y as? String { return a == b } else { diff --git a/Tests/SparkConnectTests/DataFrameTests.swift b/Tests/SparkConnectTests/DataFrameTests.swift index 2edd5f8..62b117a 100644 --- a/Tests/SparkConnectTests/DataFrameTests.swift +++ b/Tests/SparkConnectTests/DataFrameTests.swift @@ -932,6 +932,19 @@ struct DataFrameTests { #expect(try await df.collect() == expected) await spark.stop() } + + @Test + func timestamp() async throws { + let spark = try await SparkSession.builder.getOrCreate() + let df = try await spark.sql( + "SELECT TIMESTAMP '2025-05-01 16:23:40', TIMESTAMP '2025-05-01 16:23:40.123456'") + let expected = [ + Row( + Date(timeIntervalSince1970: 1746116620.0), Date(timeIntervalSince1970: 1746116620.123456)) + ] + #expect(try await df.collect() == expected) + await spark.stop() + } #endif @Test