15
15
# Script inspired by: https://github.com/jupyter/docker-stacks/blob/master/pyspark-notebook/Dockerfile#L18
16
16
# https://github.com/apache/incubator-toree/blob/master/Dockerfile
17
17
18
+
19
+ # Todo: Add additional spark configuration:
20
+ # https://spark.apache.org/docs/latest/configuration.html
21
+ # TODO start spark master?
22
+ # https://medium.com/@marcovillarreal_40011/creating-a-spark-standalone-cluster-with-docker-and-docker-compose-ba9d743a157f
23
+ # ENV SPARK_MASTER_PORT 7077
24
+ # ENV SPARK_MASTER_WEBUI_PORT 8080
25
+ # ENV SPARK_WORKER_WEBUI_PORT 8081
26
+ # ENV SPARK_MASTER_LOG /spark/logs
27
+ # ENV SPARK_WORKER_LOG /spark/logs
28
+ # export SPARK_MASTER_HOST=`hostname`
29
+ # SPARK_WORKER_CORES=1
30
+ # SPARK_WORKER_MEMORY=1G
31
+ # SPARK_DRIVER_MEMORY=128m
32
+ # SPARK_EXECUTOR_MEMORY=256m
33
+
34
+ # TODO configure spark ui to be proxied with base path:
35
+ # https://stackoverflow.com/questions/45971127/wrong-css-location-of-spark-application-ui
36
+ # https://github.com/jupyterhub/jupyter-server-proxy/issues/57
37
+ # https://github.com/yuvipanda/jupyter-sparkui-proxy/blob/master/jupyter_sparkui_proxy/__init__.py
38
+
39
+
18
40
# Install scala 2.12
19
41
if [[ ! $( scala -version 2>&1 ) =~ " version 2.12" ]]; then
20
42
# Update to Scala 2.12 is required for spark
21
- SCALA_VERSION=2.12.12
22
- echo " Updating to Scala $SCALA_VERSION . Please wait..."
23
- apt-get remove scala-library scala
24
- apt-get autoremove
25
- wget -q https://downloads.lightbend.com/scala/$SCALA_VERSION /scala-$SCALA_VERSION .deb -O ./scala.deb
26
- dpkg -i scala.deb
27
- rm scala.deb
28
- apt-get update
29
- apt-get install scala
43
+ echo " Scala 2.12 is not installed. You should consider running the scala-utils.sh tool installer before continuing."
44
+ sleep 10
30
45
else
31
46
echo " Scala 2.12 already installed."
32
47
fi
@@ -38,7 +53,8 @@ if [ ! -d "$SPARK_HOME" ]; then
38
53
cd $RESOURCES_PATH
39
54
SPARK_VERSION=" 3.0.1"
40
55
HADOOP_VERSION=" 3.2"
41
- wget https://mirror.checkdomain.de/apache/spark/spark-$SPARK_VERSION /spark-$SPARK_VERSION -bin-hadoop$HADOOP_VERSION .tgz -O ./spark.tar.gz
56
+ echo " Downloading. Please wait..."
57
+ wget -q https://mirror.checkdomain.de/apache/spark/spark-$SPARK_VERSION /spark-$SPARK_VERSION -bin-hadoop$HADOOP_VERSION .tgz -O ./spark.tar.gz
42
58
tar xzf spark.tar.gz
43
59
mv spark-$SPARK_VERSION -bin-hadoop$HADOOP_VERSION / $SPARK_HOME
44
60
rm spark.tar.gz
@@ -55,12 +71,12 @@ if [ ! -d "$SPARK_HOME" ]; then
55
71
pip install --no-cache-dir sparkmagic
56
72
jupyter serverextension enable --py sparkmagic
57
73
58
- # Install sparkmonitor: https://github.com/krishnan-r/sparkmonitor
59
- pip install --no-cache-dir sparkmonitor
60
- jupyter nbextension install sparkmonitor --py --sys-prefix --symlink
61
- jupyter nbextension enable sparkmonitor --py --sys-prefix
62
- jupyter serverextension enable --py --sys-prefix sparkmonitor
63
- ipython profile create && echo " c.InteractiveShellApp.extensions.append('sparkmonitor.kernelextension')" >> $( ipython profile locate default) /ipython_kernel_config.py
74
+ # TODO: does not work right now: Install sparkmonitor: https://github.com/krishnan-r/sparkmonitor
75
+ # pip install --no-cache-dir sparkmonitor
76
+ # jupyter nbextension install sparkmonitor --py --sys-prefix --symlink
77
+ # jupyter nbextension enable sparkmonitor --py --sys-prefix
78
+ # jupyter serverextension enable --py --sys-prefix sparkmonitor
79
+ # ipython profile create && echo "c.InteractiveShellApp.extensions.append('sparkmonitor.kernelextension')" >> $(ipython profile locate default)/ipython_kernel_config.py
64
80
65
81
# Deprecated: jupyter-spark: https://github.com/mozilla/jupyter-spark
66
82
# jupyter serverextension enable --py jupyter_spark && \
@@ -85,8 +101,10 @@ pip install --no-cache-dir pyspark findspark pyarrow spylon-kernel
85
101
if [[ ! $( jupyter kernelspec list) =~ " toree" ]]; then
86
102
echo " Installing Toree Kernel for Jupyter. Please wait..."
87
103
TOREE_VERSION=0.5.0
88
- pip install --no-cache-dir https://dist.apache.org/repos/dist/dev/incubator/toree/$TOREE_VERSION -incubating-rc1/toree-pip/toree-$TOREE_VERSION .tar.gz
89
- jupyter toree install --sys-prefix --spark_home=$SPARK_HOME
104
+ echo " Torre Kernel does not seem to work with the installed spark and scala verison."
105
+ # TODO: Fix installation
106
+ # pip install --no-cache-dir https://dist.apache.org/repos/dist/dev/incubator/toree/$TOREE_VERSION-incubating-rc1/toree-pip/toree-$TOREE_VERSION.tar.gz
107
+ # jupyter toree install --sys-prefix --spark_home=$SPARK_HOME
90
108
else
91
109
echo " Toree Kernel for Jupyter is already installed."
92
110
fi
0 commit comments