apache · dongjoon-hyun · Mar 14, 2025 · Mar 14, 2025 · dongjoon-hyun · Mar 14, 2025
diff --git a/Sources/SparkConnect/DataFrame.swift b/Sources/SparkConnect/DataFrame.swift
@@ -58,7 +58,7 @@ public actor DataFrame: Sendable {
 
   /// Add `Apache Arrow`'s `RecordBatch`s to the internal array.
   /// - Parameter batches: An array of ``RecordBatch``.
-  private func addBathes(_ batches: [RecordBatch]) {
+  private func addBatches(_ batches: [RecordBatch]) {
     self.batches.append(contentsOf: batches)
   }
 
@@ -153,16 +153,35 @@ public actor DataFrame: Sendable {
             let arrowResult = ArrowReader.makeArrowReaderResult()
             _ = reader.fromMessage(schema, dataBody: Data(), result: arrowResult)
             _ = reader.fromMessage(dataHeader, dataBody: dataBody, result: arrowResult)
-            await self.addBathes(arrowResult.batches)
+            await self.addBatches(arrowResult.batches)
           }
         }
       }
     }
   }
 
-  /// This is designed not to support this feature in order to simplify the Swift client.
-  public func collect() async throws {
-    throw SparkConnectError.UnsupportedOperationException
+  /// Execute the plan and return the result as ``[[String?]]``.
+  /// - Returns: ``[[String?]]``
+  public func collect() async throws -> [[String?]] {
+    try await execute()
+
+    var result: [[String?]] = []
+    for batch in self.batches {
+      for i in 0..<batch.length {
+        var values: [String?] = []
+        for column in batch.columns {
+          let str = column.array as! AsString
+          if column.data.isNull(i) {
+            values.append(nil)
+          } else {
+            values.append(str.asString(i))
+          }
+        }
+        result.append(values)
+      }
+    }
+
+    return result
   }
 
   /// Execute the plan and show the result.

diff --git a/Sources/SparkConnect/SparkConnectClient.swift b/Sources/SparkConnect/SparkConnectClient.swift
@@ -275,9 +275,11 @@ public actor SparkConnectClient {
     let expressions: [Spark_Connect_Expression.SortOrder] = cols.map {
       var expression = Spark_Connect_Expression.SortOrder()
       expression.child.exprType = .unresolvedAttribute($0.toUnresolvedAttribute)
+      expression.direction = .ascending
       return expression
     }
     sort.order = expressions
+    sort.isGlobal = true
     var relation = Relation()
     relation.sort = sort
     var plan = Plan()

diff --git a/Sources/SparkConnect/SparkSession.swift b/Sources/SparkConnect/SparkSession.swift
@@ -45,12 +45,10 @@ public actor SparkSession {
   ///   - userID: an optional user ID. If absent, `SPARK_USER` environment or ``ProcessInfo.processInfo.userName`` is used.
   init(_ connection: String, _ userID: String? = nil) {
     let processInfo = ProcessInfo.processInfo
-#if os(iOS) || os(watchOS) || os(tvOS)
-    let userName = processInfo.environment["SPARK_USER"] ?? ""
-#elseif os(macOS) || os(Linux)
+#if os(macOS) || os(Linux)
     let userName = processInfo.environment["SPARK_USER"] ?? processInfo.userName
 #else
-    assert(false, "Unsupported platform")
+    let userName = processInfo.environment["SPARK_USER"] ?? ""
 #endif
     self.client = SparkConnectClient(remote: connection, user: userID ?? userName)
     self.conf = RuntimeConf(self.client)

diff --git a/Tests/SparkConnectTests/DataFrameTests.swift b/Tests/SparkConnectTests/DataFrameTests.swift
@@ -125,19 +125,23 @@ struct DataFrameTests {
     await spark.stop()
   }
 
+#if !os(Linux)
   @Test
   func sort() async throws {
     let spark = try await SparkSession.builder.getOrCreate()
-    #expect(try await spark.range(10).sort("id").count() == 10)
+    let expected = (1...10).map{ [String($0)] }
+    #expect(try await spark.range(10, 0, -1).sort("id").collect() == expected)
     await spark.stop()
   }
 
   @Test
   func orderBy() async throws {
     let spark = try await SparkSession.builder.getOrCreate()
-    #expect(try await spark.range(10).orderBy("id").count() == 10)
+    let expected = (1...10).map{ [String($0)] }
+    #expect(try await spark.range(10, 0, -1).orderBy("id").collect() == expected)
     await spark.stop()
   }
+#endif
 
   @Test
   func table() async throws {
@@ -153,6 +157,17 @@ struct DataFrameTests {
   }
 
 #if !os(Linux)
+  @Test
+  func collect() async throws {
+    let spark = try await SparkSession.builder.getOrCreate()
+    #expect(try await spark.range(0).collect().isEmpty)
+    #expect(
+      try await spark.sql(
+        "SELECT * FROM VALUES (1, true, 'abc'), (null, null, null), (3, false, 'def')"
+      ).collect() == [["1", "true", "abc"], [nil, nil, nil], ["3", "false", "def"]])
+    await spark.stop()
+  }
+
   @Test
   func show() async throws {
     let spark = try await SparkSession.builder.getOrCreate()

diff --git a/Tests/SparkConnectTests/SparkSessionTests.swift b/Tests/SparkConnectTests/SparkSessionTests.swift
@@ -41,7 +41,11 @@ struct SparkSessionTests {
 
   @Test func userContext() async throws {
     let spark = try await SparkSession.builder.getOrCreate()
+#if os(macOS) || os(Linux)
     let defaultUserContext = ProcessInfo.processInfo.userName.toUserContext
+#else
+    let defaultUserContext = "".toUserContext
+#endif
     #expect(await spark.client.userContext == defaultUserContext)
     await spark.stop()
   }