apache · dongjoon-hyun · Apr 16, 2025 · Apr 16, 2025 · viirya · Apr 16, 2025
diff --git a/Sources/SparkConnect/Catalog.swift b/Sources/SparkConnect/Catalog.swift
@@ -100,7 +100,7 @@ public actor Catalog: Sendable {
       catalog.catType = .currentCatalog(Spark_Connect_CurrentCatalog())
       return catalog
     })
-    return try await df.collect()[0][0]!
+    return try await df.collect()[0][0] as! String
   }
 
   /// Sets the current default catalog in this session.
@@ -130,7 +130,7 @@ public actor Catalog: Sendable {
       return catalog
     })
     return try await df.collect().map {
-      CatalogMetadata(name: $0[0]!, description: $0[1])
+      try CatalogMetadata(name: $0[0] as! String, description: $0[1] as? String)
     }
   }
 
@@ -142,7 +142,7 @@ public actor Catalog: Sendable {
       catalog.catType = .currentDatabase(Spark_Connect_CurrentDatabase())
       return catalog
     })
-    return try await df.collect()[0][0]!
+    return try await df.collect()[0][0] as! String
   }
 
   /// Sets the current default database in this session.
@@ -173,7 +173,7 @@ public actor Catalog: Sendable {
       return catalog
     })
     return try await df.collect().map {
-      Database(name: $0[0]!, catalog: $0[1], description: $0[2], locationUri: $0[3]!)
+      try Database(name: $0[0] as! String, catalog: $0[1] as? String, description: $0[2] as? String, locationUri: $0[3] as! String)
     }
   }
 
@@ -189,7 +189,7 @@ public actor Catalog: Sendable {
       return catalog
     })
     return try await df.collect().map {
-      Database(name: $0[0]!, catalog: $0[1], description: $0[2], locationUri: $0[3]!)
+      try Database(name: $0[0] as! String, catalog: $0[1] as? String, description: $0[2] as? String, locationUri: $0[3] as! String)
     }.first!
   }
 

diff --git a/Sources/SparkConnect/DataFrame.swift b/Sources/SparkConnect/DataFrame.swift
@@ -197,15 +197,15 @@ public actor DataFrame: Sendable {
     }
   }
 
-  /// Execute the plan and return the result as ``[[String?]]``.
-  /// - Returns: ``[[String?]]``
-  public func collect() async throws -> [[String?]] {
+  /// Execute the plan and return the result as ``[Row]``.
+  /// - Returns: ``[Row]``
+  public func collect() async throws -> [Row] {
     try await execute()
 
-    var result: [[String?]] = []
+    var result: [Row] = []
     for batch in self.batches {
       for i in 0..<batch.length {
-        var values: [String?] = []
+        var values: [Sendable?] = []
         for column in batch.columns {
           let str = column.array as! AsString
           if column.data.isNull(i) {
@@ -217,7 +217,7 @@ public actor DataFrame: Sendable {
             values.append(str.asString(i))
           }
         }
-        result.append(values)
+        result.append(Row(valueArray: values))
       }
     }
 
@@ -377,15 +377,15 @@ public actor DataFrame: Sendable {
 
   /// Returns the first `n` rows.
   /// - Parameter n: The number of rows. (default: 1)
-  /// - Returns: ``[[String?]]``
-  public func head(_ n: Int32 = 1) async throws -> [[String?]] {
+  /// - Returns: ``[Row]``
+  public func head(_ n: Int32 = 1) async throws -> [Row] {
     return try await limit(n).collect()
   }
 
   /// Returns the last `n` rows.
   /// - Parameter n: The number of rows.
-  /// - Returns: ``[[String?]]``
-  public func tail(_ n: Int32) async throws -> [[String?]] {
+  /// - Returns: ``[Row]``
+  public func tail(_ n: Int32) async throws -> [Row] {
     let lastN = DataFrame(spark:spark, plan: SparkConnectClient.getTail(self.plan.root, n))
     return try await lastN.collect()
   }

diff --git a/Sources/SparkConnect/Row.swift b/Sources/SparkConnect/Row.swift
@@ -0,0 +1,90 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+import Foundation
+
+public struct Row: Sendable, Equatable {
+  let values: [Sendable?]
+
+  public init(_ values: Sendable?...) {
+    self.values = values
+  }
+
+  public init(valueArray: [Sendable?]) {
+    self.values = valueArray
+  }
+
+  public static var empty: Row {
+    return Row()
+  }
+
+  public var size: Int { return length }
+
+  public var length: Int { return values.count }
+
+  subscript(index: Int) -> Sendable {
+    get throws {
+      return try get(index)
+    }
+  }
+
+  public func get(_ i: Int) throws -> Sendable {
+    if i < 0 || i >= self.length {
+      throw SparkConnectError.InvalidArgumentException
+    }
+    return values[i]
+  }
+
+  public static func == (lhs: Row, rhs: Row) -> Bool {
+    if lhs.values.count != rhs.values.count {
+      return false
+    }
+    return lhs.values.elementsEqual(rhs.values) { (x, y) in
+      if x == nil && y == nil {
+        return true
+      } else if let a = x as? Bool, let b = y as? Bool {
+        return a == b
+      } else if let a = x as? Int, let b = y as? Int {
+        return a == b
+      } else if let a = x as? Int8, let b = y as? Int8 {
+        return a == b
+      } else if let a = x as? Int16, let b = y as? Int16 {
+        return a == b
+      } else if let a = x as? Int32, let b = y as? Int32 {
+        return a == b
+      } else if let a = x as? Int64, let b = y as? Int64 {
+        return a == b
+      } else if let a = x as? Float, let b = y as? Float {
+        return a == b
+      } else if let a = x as? Double, let b = y as? Double {
+        return a == b
+      } else if let a = x as? String, let b = y as? String {
+        return a == b
+      } else {
+        return false
+      }
+    }
+  }
+
+  public func toString() -> String {
+    return "[\(self.values.map { "\($0 ?? "null")" }.joined(separator: ","))]"
+  }
+}
+
+extension Row {
+}
diff --git a/Tests/SparkConnectTests/DataFrameTests.swift b/Tests/SparkConnectTests/DataFrameTests.swift
@@ -249,15 +249,15 @@ struct DataFrameTests {
   @Test
   func sort() async throws {
     let spark = try await SparkSession.builder.getOrCreate()
-    let expected = (1...10).map{ [String($0)] }
+    let expected = Array((1...10).map{ Row(String($0)) })
     #expect(try await spark.range(10, 0, -1).sort("id").collect() == expected)
     await spark.stop()
   }
 
   @Test
   func orderBy() async throws {
     let spark = try await SparkSession.builder.getOrCreate()
-    let expected = (1...10).map{ [String($0)] }
+    let expected = Array((1...10).map{ Row(String($0)) })
     #expect(try await spark.range(10, 0, -1).orderBy("id").collect() == expected)
     await spark.stop()
   }
@@ -284,28 +284,28 @@ struct DataFrameTests {
     #expect(
       try await spark.sql(
         "SELECT * FROM VALUES (1, true, 'abc'), (null, null, null), (3, false, 'def')"
-      ).collect() == [["1", "true", "abc"], [nil, nil, nil], ["3", "false", "def"]])
+      ).collect() == [Row("1", "true", "abc"), Row(nil, nil, nil), Row("3", "false", "def")])
     await spark.stop()
   }
 
   @Test
   func head() async throws {
     let spark = try await SparkSession.builder.getOrCreate()
     #expect(try await spark.range(0).head().isEmpty)
-    #expect(try await spark.range(2).sort("id").head() == [["0"]])
-    #expect(try await spark.range(2).sort("id").head(1) == [["0"]])
-    #expect(try await spark.range(2).sort("id").head(2) == [["0"], ["1"]])
-    #expect(try await spark.range(2).sort("id").head(3) == [["0"], ["1"]])
+    #expect(try await spark.range(2).sort("id").head() == [Row("0")])
+    #expect(try await spark.range(2).sort("id").head(1) == [Row("0")])
+    #expect(try await spark.range(2).sort("id").head(2) == [Row("0"), Row("1")])
+    #expect(try await spark.range(2).sort("id").head(3) == [Row("0"), Row("1")])
     await spark.stop()
   }
 
   @Test
   func tail() async throws {
     let spark = try await SparkSession.builder.getOrCreate()
     #expect(try await spark.range(0).tail(1).isEmpty)
-    #expect(try await spark.range(2).sort("id").tail(1) == [["1"]])
-    #expect(try await spark.range(2).sort("id").tail(2) == [["0"], ["1"]])
-    #expect(try await spark.range(2).sort("id").tail(3) == [["0"], ["1"]])
+    #expect(try await spark.range(2).sort("id").tail(1) == [Row("1")])
+    #expect(try await spark.range(2).sort("id").tail(2) == [Row("0"), Row("1")])
+    #expect(try await spark.range(2).sort("id").tail(3) == [Row("0"), Row("1")])
     await spark.stop()
   }
 

diff --git a/Tests/SparkConnectTests/Resources/queries/binary.sql.answer b/Tests/SparkConnectTests/Resources/queries/binary.sql.answer
@@ -0,0 +1 @@
+[[61 62 63]]
diff --git a/Tests/SparkConnectTests/Resources/queries/binary.sql.json b/Tests/SparkConnectTests/Resources/queries/binary.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/cache.sql.answer b/Tests/SparkConnectTests/Resources/queries/cache.sql.answer
@@ -0,0 +1 @@
+
diff --git a/Tests/SparkConnectTests/Resources/queries/cache.sql.json b/Tests/SparkConnectTests/Resources/queries/cache.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.answer b/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.answer
@@ -0,0 +1 @@
+
diff --git a/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.json b/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/date.sql.answer b/Tests/SparkConnectTests/Resources/queries/date.sql.answer
@@ -0,0 +1 @@
+[2025-03-15 00:00:00 +0000]
diff --git a/Tests/SparkConnectTests/Resources/queries/date.sql.json b/Tests/SparkConnectTests/Resources/queries/date.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/describe_database.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_database.sql.answer
@@ -0,0 +1,5 @@
+[Catalog Name,spark_catalog]
+[Namespace Name,default]
+[Comment,default database]
+[Location,*]
+[Owner,*]
 private func cleanUp(_ str: String) -> String { 
   return removeOwner(removeID(removeLocation(str))) 
 } 
 private func removeID(_ str: String) -> String { 
   return str.replacing(regexPlanId, with: "plan_id=").replacing(regexID, with: "#") 
 } 
 private func removeLocation(_ str: String) -> String { 
   return str.replacing(regexLocation, with: "*") 
 } 
 private func removeOwner(_ str: String) -> String { 
   return str.replacing(regexOwner, with: "*") 
 } 
 private func cleanUp(_ str: String) -> String { 
   return removeOwner(removeID(removeLocation(str))) 
 } 
  
 private func removeID(_ str: String) -> String { 
   return str.replacing(regexPlanId, with: "plan_id=").replacing(regexID, with: "#") 
 } 
  
 private func removeLocation(_ str: String) -> String { 
   return str.replacing(regexLocation, with: "*") 
 } 
  
 private func removeOwner(_ str: String) -> String { 
   return str.replacing(regexOwner, with: "*") 
 } 
diff --git a/Tests/SparkConnectTests/Resources/queries/describe_database.sql.json b/Tests/SparkConnectTests/Resources/queries/describe_database.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/describe_function.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_function.sql.answer
@@ -0,0 +1,3 @@
+[Function: abs]
+[Class: org.apache.spark.sql.catalyst.expressions.Abs]
+[Usage: abs(expr) - Returns the absolute value of the numeric or interval value.]
diff --git a/Tests/SparkConnectTests/Resources/queries/describe_function.sql.json b/Tests/SparkConnectTests/Resources/queries/describe_function.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/describe_query.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_query.sql.answer
@@ -0,0 +1,3 @@
+[id,int,null]
+[name,string,null]
+[salary,double,null]
diff --git a/Tests/SparkConnectTests/Resources/queries/describe_query.sql.json b/Tests/SparkConnectTests/Resources/queries/describe_query.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/describe_table.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_table.sql.answer
@@ -0,0 +1 @@
+[col,int,null]
diff --git a/Tests/SparkConnectTests/Resources/queries/describe_table.sql.json b/Tests/SparkConnectTests/Resources/queries/describe_table.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/explain.sql.answer b/Tests/SparkConnectTests/Resources/queries/explain.sql.answer
@@ -0,0 +1,22 @@
+[== Parsed Logical Plan ==
+'Aggregate ['k], ['k, unresolvedalias('sum('v))]
++- SubqueryAlias t
+   +- LocalRelation [k#, v#]
+
+== Analyzed Logical Plan ==
+k: int, sum(v): bigint
+Aggregate [k#], [k#, sum(v#) AS sum(v)#]
++- SubqueryAlias t
+   +- LocalRelation [k#, v#]
+
+== Optimized Logical Plan ==
+Aggregate [k#], [k#, sum(v#) AS sum(v)#]
++- LocalRelation [k#, v#]
+
+== Physical Plan ==
+AdaptiveSparkPlan isFinalPlan=false
++- HashAggregate(keys=[k#], functions=[sum(v#)], output=[k#, sum(v)#])
+   +- Exchange hashpartitioning(k#, 200), ENSURE_REQUIREMENTS, [plan_id=]
+      +- HashAggregate(keys=[k#], functions=[partial_sum(v#)], output=[k#, sum#])
+         +- LocalTableScan [k#, v#]
+]
diff --git a/Tests/SparkConnectTests/Resources/queries/explain.sql.json b/Tests/SparkConnectTests/Resources/queries/explain.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/floating.sql.answer b/Tests/SparkConnectTests/Resources/queries/floating.sql.answer
@@ -0,0 +1 @@
+[1.0,-2.0,3.0,-4.0,inf,nan,inf,nan]
diff --git a/Tests/SparkConnectTests/Resources/queries/floating.sql.json b/Tests/SparkConnectTests/Resources/queries/floating.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/integral.sql.answer b/Tests/SparkConnectTests/Resources/queries/integral.sql.answer
@@ -0,0 +1 @@
+[127,-128,32767,-32768,2147483647,-2147483648,9223372036854775807,-9223372036854775808]
diff --git a/Tests/SparkConnectTests/Resources/queries/integral.sql.json b/Tests/SparkConnectTests/Resources/queries/integral.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.answer b/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.answer
@@ -0,0 +1,2 @@
+[0,0]
+[1,2]
diff --git a/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.json b/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/select.sql.answer b/Tests/SparkConnectTests/Resources/queries/select.sql.answer
@@ -0,0 +1 @@
+[1]
diff --git a/Tests/SparkConnectTests/Resources/queries/select.sql.json b/Tests/SparkConnectTests/Resources/queries/select.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/show_databases.sql.answer b/Tests/SparkConnectTests/Resources/queries/show_databases.sql.answer
@@ -0,0 +1 @@
+[default]
diff --git a/Tests/SparkConnectTests/Resources/queries/show_databases.sql.json b/Tests/SparkConnectTests/Resources/queries/show_databases.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/show_tables.sql.answer b/Tests/SparkConnectTests/Resources/queries/show_tables.sql.answer
@@ -0,0 +1 @@
+[,testcache,true]
diff --git a/Tests/SparkConnectTests/Resources/queries/show_tables.sql.json b/Tests/SparkConnectTests/Resources/queries/show_tables.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/string.sql.answer b/Tests/SparkConnectTests/Resources/queries/string.sql.answer
@@ -0,0 +1 @@
+[abc,def]
diff --git a/Tests/SparkConnectTests/Resources/queries/string.sql.json b/Tests/SparkConnectTests/Resources/queries/string.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/struct.sql.answer b/Tests/SparkConnectTests/Resources/queries/struct.sql.answer
@@ -0,0 +1 @@
+[{1},{2,{3}}]
diff --git a/Tests/SparkConnectTests/Resources/queries/struct.sql.json b/Tests/SparkConnectTests/Resources/queries/struct.sql.json
diff --git a/Tests/SparkConnectTests/Resources/queries/uncache.sql.answer b/Tests/SparkConnectTests/Resources/queries/uncache.sql.answer
@@ -0,0 +1 @@
+
diff --git a/Tests/SparkConnectTests/Resources/queries/uncache.sql.json b/Tests/SparkConnectTests/Resources/queries/uncache.sql.json
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		[127,-128,32767,-32768,2147483647,-2147483648,9223372036854775807,-9223372036854775808]