diff --git a/Examples/spark-sql/Dockerfile b/Examples/spark-sql/Dockerfile new file mode 100644 index 0000000..99c0db2 --- /dev/null +++ b/Examples/spark-sql/Dockerfile @@ -0,0 +1,46 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM swift:6.1 AS builder + +WORKDIR /app + +COPY . . + +RUN swift build -c release + +FROM swift:6.1-slim + +ARG SPARK_UID=185 + +LABEL org.opencontainers.image.authors="Apache Spark project " +LABEL org.opencontainers.image.licenses="Apache-2.0" +LABEL org.opencontainers.image.ref.name="Apache Spark Connect for Swift" + +ENV SPARK_SWIFT_HOME=/opt/spark-swift +ENV SPARK_SWIFT_APP=SparkConnectSwiftSQLRepl + +WORKDIR $SPARK_SWIFT_HOME + +RUN groupadd --system --gid=$SPARK_UID spark && \ + useradd --system --home-dir $SPARK_SWIFT_HOME --uid=$SPARK_UID --gid=spark spark && \ + chown -R spark:spark $SPARK_SWIFT_HOME + +COPY --from=builder --chown=spark:spark /app/.build/*-unknown-linux-gnu/release/$SPARK_SWIFT_APP . + +USER spark + +ENTRYPOINT ["/bin/sh", "-c", "$SPARK_SWIFT_HOME/$SPARK_SWIFT_APP"] diff --git a/Examples/spark-sql/Package.swift b/Examples/spark-sql/Package.swift new file mode 100644 index 0000000..b5ed5ff --- /dev/null +++ b/Examples/spark-sql/Package.swift @@ -0,0 +1,37 @@ +// swift-tools-version: 6.0 +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +import PackageDescription + +let package = Package( + name: "SparkConnectSwiftSQLRepl", + platforms: [ + .macOS(.v15) + ], + dependencies: [ + .package(url: "https://github.com/apache/spark-connect-swift.git", branch: "main") + ], + targets: [ + .executableTarget( + name: "SparkConnectSwiftSQLRepl", + dependencies: [.product(name: "SparkConnect", package: "spark-connect-swift")] + ) + ] +) diff --git a/Examples/spark-sql/README.md b/Examples/spark-sql/README.md new file mode 100644 index 0000000..73bcf95 --- /dev/null +++ b/Examples/spark-sql/README.md @@ -0,0 +1,118 @@ +# A `Spark SQL REPL` Application with Apache Spark Connect Swift Client + +This is an example Swift application to show how to develop a Spark SQL REPL(Read-eval-print Loop) with Apache Spark Connect Swift Client library. + +## How to run + +Prepare `Spark Connect Server` via running Docker image. + +``` +docker run -it --rm -p 15002:15002 apache/spark:4.0.0-preview2 bash -c "/opt/spark/sbin/start-connect-server.sh --wait" +``` + +Build an application Docker image. + +``` +$ docker build -t apache/spark-connect-swift:spark-sql . +$ docker images apache/spark-connect-swift:spark-sql +REPOSITORY TAG IMAGE ID CREATED SIZE +apache/spark-connect-swift spark-sql 265ddfec650d 7 seconds ago 390MB +``` + +Run `spark-sql` docker image. + +``` +$ docker run -it --rm -e SPARK_REMOTE=sc://host.docker.internal:15002 apache/spark-connect-swift:spark-sql +Connected to Apache Spark 4.0.0-preview2 Server +spark-sql (default)> SHOW DATABASES; ++---------+ +|namespace| ++---------+ +|default | ++---------+ + +Time taken: 30 ms +spark-sql (default)> CREATE DATABASE db1; +++ +|| +++ +++ + +Time taken: 31 ms +spark-sql (default)> USE db1; +++ +|| +++ +++ + +Time taken: 27 ms +spark-sql (db1)> CREATE TABLE t1 AS SELECT * FROM RANGE(10); +++ +|| +++ +++ + +Time taken: 99 ms +spark-sql (db1)> SELECT * FROM t1; ++---+ +| id| ++---+ +| 1| +| 5| +| 3| +| 0| +| 6| +| 9| +| 4| +| 8| +| 7| +| 2| ++---+ + +Time taken: 80 ms +spark-sql (db1)> USE default; +++ +|| +++ +++ + +Time taken: 26 ms +spark-sql (default)> DROP DATABASE db1 CASCADE; +++ +|| +++ +++ +spark-sql (default)> exit; +``` + +Apache Spark 4 supports [SQL Pipe Syntax](https://dist.apache.org/repos/dist/dev/spark/v4.0.0-rc6-docs/_site/sql-pipe-syntax.html). + +``` +$ swift run +... +Build of product 'SparkSQLRepl' complete! (2.33s) +Connected to Apache Spark 4.0.0 Server +spark-sql (default)> +FROM ORC.`/opt/spark/examples/src/main/resources/users.orc` +|> AGGREGATE COUNT(*) cnt + GROUP BY name +|> ORDER BY cnt DESC, name ASC +; ++------+---+ +| name|cnt| ++------+---+ +|Alyssa| 1| +| Ben| 1| ++------+---+ + +Time taken: 159 ms +``` + +Run from source code. + +``` +$ swift run +... +Connected to Apache Spark 4.0.0.9-apple-SNAPSHOT Server +spark-sql (default)> +``` diff --git a/Sources/SparkSQLRepl/main.swift b/Examples/spark-sql/Sources/main.swift similarity index 100% rename from Sources/SparkSQLRepl/main.swift rename to Examples/spark-sql/Sources/main.swift diff --git a/Package.swift b/Package.swift index b17181e..0a1ec16 100644 --- a/Package.swift +++ b/Package.swift @@ -52,10 +52,6 @@ let package = Package( .process("Documentation.docc") ] ), - .executableTarget( - name: "SparkSQLRepl", - dependencies: ["SparkConnect"] - ), .testTarget( name: "SparkConnectTests", dependencies: ["SparkConnect"], diff --git a/README.md b/README.md index 374ce01..6a8cd59 100644 --- a/README.md +++ b/README.md @@ -114,103 +114,7 @@ SELECT * FROM t +----+ ``` -You can find more complete examples including Web Server and Streaming applications in the `Examples` directory. +You can find more complete examples including `Spark SQL REPL`, `Web Server` and `Streaming` applications in the [Examples](https://github.com/apache/spark-connect-swift/tree/main/Examples) directory. -## How to use `Spark SQL REPL` via `Spark Connect for Swift` +This library also supports `SPARK_REMOTE` environment variable to specify the [Spark Connect connection string](https://spark.apache.org/docs/latest/spark-connect-overview.html#set-sparkremote-environment-variable) in order to provide more options. -This project also provides `Spark SQL REPL`. You can run it directly from this repository. - -```bash -$ swift run -... -Build of product 'SparkSQLRepl' complete! (2.33s) -Connected to Apache Spark 4.0.0 Server -spark-sql (default)> SHOW DATABASES; -+---------+ -|namespace| -+---------+ -| default| -+---------+ - -Time taken: 30 ms -spark-sql (default)> CREATE DATABASE db1; -++ -|| -++ -++ - -Time taken: 31 ms -spark-sql (default)> USE db1; -++ -|| -++ -++ - -Time taken: 27 ms -spark-sql (db1)> CREATE TABLE t1 AS SELECT * FROM RANGE(10); -++ -|| -++ -++ - -Time taken: 99 ms -spark-sql (db1)> SELECT * FROM t1; -+---+ -| id| -+---+ -| 1| -| 5| -| 3| -| 0| -| 6| -| 9| -| 4| -| 8| -| 7| -| 2| -+---+ - -Time taken: 80 ms -spark-sql (db1)> USE default; -++ -|| -++ -++ - -Time taken: 26 ms -spark-sql (default)> DROP DATABASE db1 CASCADE; -++ -|| -++ -++ -spark-sql (default)> exit; -``` - -Apache Spark 4 supports [SQL Pipe Syntax](https://dist.apache.org/repos/dist/dev/spark/v4.0.0-rc6-docs/_site/sql-pipe-syntax.html). - -``` -$ swift run -... -Build of product 'SparkSQLRepl' complete! (2.33s) -Connected to Apache Spark 4.0.0 Server -spark-sql (default)> -FROM ORC.`/opt/spark/examples/src/main/resources/users.orc` -|> AGGREGATE COUNT(*) cnt - GROUP BY name -|> ORDER BY cnt DESC, name ASC -; -+------+---+ -| name|cnt| -+------+---+ -|Alyssa| 1| -| Ben| 1| -+------+---+ - -Time taken: 159 ms -``` - -You can use `SPARK_REMOTE` to specify the [Spark Connect connection string](https://spark.apache.org/docs/latest/spark-connect-overview.html#set-sparkremote-environment-variable) in order to provide more options. - -```bash -SPARK_REMOTE=sc://localhost swift run -``` diff --git a/Sources/SparkConnect/Documentation.docc/Examples.md b/Sources/SparkConnect/Documentation.docc/Examples.md index 2f87e70..7999151 100644 --- a/Sources/SparkConnect/Documentation.docc/Examples.md +++ b/Sources/SparkConnect/Documentation.docc/Examples.md @@ -37,6 +37,30 @@ docker run -it --rm -e SPARK_REMOTE=sc://host.docker.internal:15002 apache/spark swift run ``` +## Spark SQL REPL(Read-Eval-Print Loop) Example + +The Spark SQL REPL application example demonstrates interactive operations with ad-hoc Spark SQL queries with Apache Spark Connect, including: +- Connecting to a Spark server +- Receiving ad-hoc Spark SQL queries from users +- Show the SQL results interactively + +### Key Features +- Spark SQL execution for table operations +- User interactions + +### How to Run + +Build and run the application: + +```bash +# Using Docker +docker build -t apache/spark-connect-swift:spark-sql . +docker run -it --rm -e SPARK_REMOTE=sc://host.docker.internal:15002 apache/spark-connect-swift:spark-sql + +# From source code +swift run +``` + ## Pi Calculation Example The Pi calculation example shows how to use Spark Connect Swift for computational tasks by calculating an approximation of π (pi) using the Monte Carlo method.