Skip to content
This repository was archived by the owner on Jun 2, 2025. It is now read-only.

Commit aaab8ad

Browse files
authored
Merge pull request #31 from kenny-ezirim/release-sparkml33
upgrading sparkml from v2.4 to v3.3
2 parents df53164 + e9a157b commit aaab8ad

File tree

5 files changed

+317
-67
lines changed

5 files changed

+317
-67
lines changed

Dockerfile

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ ENV DEBIAN_FRONTEND=noninteractive
55
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
66

77
RUN apt-get update \
8+
&& apt-get -y upgrade \
89
&& apt-get -y install apt-utils \
910
net-tools \
1011
apt-transport-https \
@@ -15,14 +16,13 @@ RUN apt-get update \
1516
maven \
1617
make \
1718
gcc \
19+
# openssl \
1820
zlib1g-dev
1921

20-
RUN apt -y update
21-
22-
ARG OPENSSL_VERSION=1.1.1l
22+
ARG OPENSSL_VERSION=1.1.1q
2323
ARG PYTHON=python3
2424
ARG PIP=pip3
25-
ARG PYTHON_VERSION=3.6.13
25+
ARG PYTHON_VERSION=3.10.7
2626

2727
# Open-SSL
2828
RUN wget -q -c https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz \
@@ -34,7 +34,7 @@ RUN wget -q -c https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz
3434
&& rmdir /usr/local/ssl/certs \
3535
&& ln -s /etc/ssl/certs /usr/local/ssl/certs
3636

37-
# Install Python-3.6.13 from source
37+
# Install Python-3.10.7 from source
3838
RUN wget -q https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
3939
&& tar -xzf Python-$PYTHON_VERSION.tgz \
4040
&& cd Python-$PYTHON_VERSION \
@@ -54,8 +54,36 @@ WORKDIR /sagemaker-sparkml-model-server
5454

5555
RUN mvn clean package
5656

57-
RUN cp ./target/sparkml-serving-2.4.jar /usr/local/lib/sparkml-serving-2.4.jar
58-
RUN cp ./serve.sh /usr/local/bin/serve.sh
57+
RUN cp ./target/sparkml-serving-3.3.jar /usr/local/lib/sparkml-serving-3.3.jar \
58+
&& cp ./serve.sh /usr/local/bin/serve.sh \
59+
&& chmod a+x /usr/local/bin/serve.sh
60+
61+
# remove the maven-shared-utils packages - older versions create vulnerabilities
62+
RUN find / -depth -name maven-shared-utils -type d -exec rm -r "{}" \; \
63+
# remove the surefire packages - contains versions of maven-shared-utils that create vulnerabilities
64+
&& find / -depth -name surefire -type d -exec rm -r "{}" \; \
65+
# remove maven-shared-utils jar file with vulnerabilities
66+
# comment out if need to use maven utilities
67+
&& rm /usr/share/java/maven-shared-utils.jar \
68+
# remove wagon-http-shaded jar file with vulnerabilities associated with org.jsoup:jsoup
69+
&& rm /usr/share/java/wagon-http-shaded-3.3.4.jar \
70+
# remove plexus-utils directory because plexus-utils has vulnerabilities
71+
# comment out if need to use maven utilities
72+
&& find / -depth -name plexus-utils -type d -exec rm -r "{}" \; \
73+
# remove old version of commons-compress with vulnerability
74+
&& find / -depth -name commons-compress -type d -exec rm -r "{}/1.20" \; \
75+
# remove jar files from common-io v2.5 and 2.6 both have vulnerabilities
76+
&& find / -name commons-io*2.5.jar -type f -exec rm "{}" \; \
77+
&& find / -name commons-io*2.6.jar -type f -exec rm "{}" \; \
78+
# remove jackson-databind
79+
&& find / -name jackson-databind -type d -exec rm -r "{}/2.13.3" \; \
80+
# remove junit-4.12.jar
81+
&& find / -name junit-4.12.jar -type f -exec rm "{}" \; \
82+
# remove maven-compiler-plugin jar from maven repo
83+
&& find / -name maven-compiler-plugin*.jar -type f -exec rm "{}" \; \
84+
# remove guava jar files
85+
&& rm /usr/share/java/guava.jar && rm /root/.m2/repository/com/google/guava/guava/10.0.1/guava-10.0.1.jar \
86+
# remove commons-codec jar
87+
&& find / -name commons-codec-1.11.jar -type f -exec rm "{}" \;
5988

60-
RUN chmod a+x /usr/local/bin/serve.sh
61-
ENTRYPOINT ["/usr/local/bin/serve.sh"]
89+
ENTRYPOINT ["/usr/local/bin/serve.sh"]

README.md

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ SageMaker SparkML Serving Container is primarily built on the underlying Spring
2222
Supported Spark/MLeap version
2323
=============================
2424

25-
Currently SageMaker SparkML Serving is powered by MLeap 0.13.0 and it is tested with Spark major version - 2.3.
25+
Currently SageMaker SparkML Serving is powered by MLeap 0.20.0 and it is tested with Spark major version - 3.3.
2626

2727
Table of Contents
2828
=================
@@ -223,20 +223,20 @@ Calling `CreateModel` is required for creating a `Model` in SageMaker with this
223223
SageMaker works with Docker images stored in [Amazon ECR](https://aws.amazon.com/ecr/). SageMaker team has prepared and uploaded the Docker images for SageMaker SparkML Serving Container in all regions where SageMaker operates.
224224
Region to ECR container URL mapping can be found below. For a mapping from Region to Region Name, please see [here](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.RegionsAndAvailabilityZones.html).
225225

226-
* us-west-1 = 746614075791.dkr.ecr.us-west-1.amazonaws.com/sagemaker-sparkml-serving:2.4
227-
* us-west-2 = 246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-sparkml-serving:2.4
228-
* us-east-1 = 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-sparkml-serving:2.4
229-
* us-east-2 = 257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-sparkml-serving:2.4
230-
* ap-northeast-1 = 354813040037.dkr.ecr.ap-northeast-1.amazonaws.com/sagemaker-sparkml-serving:2.4
231-
* ap-northeast-2 = 366743142698.dkr.ecr.ap-northeast-2.amazonaws.com/sagemaker-sparkml-serving:2.4
232-
* ap-southeast-1 = 121021644041.dkr.ecr.ap-southeast-1.amazonaws.com/sagemaker-sparkml-serving:2.4
233-
* ap-southeast-2 = 783357654285.dkr.ecr.ap-southeast-2.amazonaws.com/sagemaker-sparkml-serving:2.4
234-
* ap-south-1 = 720646828776.dkr.ecr.ap-south-1.amazonaws.com/sagemaker-sparkml-serving:2.4
235-
* eu-west-1 = 141502667606.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-sparkml-serving:2.4
236-
* eu-west-2 = 764974769150.dkr.ecr.eu-west-2.amazonaws.com/sagemaker-sparkml-serving:2.4
237-
* eu-central-1 = 492215442770.dkr.ecr.eu-central-1.amazonaws.com/sagemaker-sparkml-serving:2.4
238-
* ca-central-1 = 341280168497.dkr.ecr.ca-central-1.amazonaws.com/sagemaker-sparkml-serving:2.4
239-
* us-gov-west-1 = 414596584902.dkr.ecr.us-gov-west-1.amazonaws.com/sagemaker-sparkml-serving:2.4
226+
* us-west-1 = 746614075791.dkr.ecr.us-west-1.amazonaws.com/sagemaker-sparkml-serving:3.3
227+
* us-west-2 = 246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-sparkml-serving:3.3
228+
* us-east-1 = 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-sparkml-serving:3.3
229+
* us-east-2 = 257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-sparkml-serving:3.3
230+
* ap-northeast-1 = 354813040037.dkr.ecr.ap-northeast-1.amazonaws.com/sagemaker-sparkml-serving:3.3
231+
* ap-northeast-2 = 366743142698.dkr.ecr.ap-northeast-2.amazonaws.com/sagemaker-sparkml-serving:3.3
232+
* ap-southeast-1 = 121021644041.dkr.ecr.ap-southeast-1.amazonaws.com/sagemaker-sparkml-serving:3.3
233+
* ap-southeast-2 = 783357654285.dkr.ecr.ap-southeast-2.amazonaws.com/sagemaker-sparkml-serving:3.3
234+
* ap-south-1 = 720646828776.dkr.ecr.ap-south-1.amazonaws.com/sagemaker-sparkml-serving:3.3
235+
* eu-west-1 = 141502667606.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-sparkml-serving:3.3
236+
* eu-west-2 = 764974769150.dkr.ecr.eu-west-2.amazonaws.com/sagemaker-sparkml-serving:3.3
237+
* eu-central-1 = 492215442770.dkr.ecr.eu-central-1.amazonaws.com/sagemaker-sparkml-serving:3.3
238+
* ca-central-1 = 341280168497.dkr.ecr.ca-central-1.amazonaws.com/sagemaker-sparkml-serving:3.3
239+
* us-gov-west-1 = 414596584902.dkr.ecr.us-gov-west-1.amazonaws.com/sagemaker-sparkml-serving:3.3
240240

241241
With [SageMaker Python SDK](https://github.com/aws/sagemaker-python-sdk)
242242
------------------------------------------------------------------------
@@ -263,7 +263,7 @@ First you need to ensure that have installed [Docker](https://www.docker.com/) o
263263
In order to build the Docker image, you need to run a single Docker command:
264264

265265
```
266-
docker build -t sagemaker-sparkml-serving:2.4 .
266+
docker build -t sagemaker-sparkml-serving:3.3 .
267267
```
268268

269269
#### Running the image locally
@@ -272,7 +272,7 @@ In order to run the Docker image, you need to run the following command. Please
272272
The command will start the server on port 8080 and will also pass the schema as an environment variable to the Docker container. Alternatively, you can edit the `Dockerfile` to add `ENV SAGEMAKER_SPARKML_SCHEMA=schema` as well before building the Docker image.
273273

274274
```
275-
docker run -p 8080:8080 -e SAGEMAKER_SPARKML_SCHEMA=schema -v /tmp/model:/opt/ml/model sagemaker-sparkml-serving:2.4 serve
275+
docker run -p 8080:8080 -e SAGEMAKER_SPARKML_SCHEMA=schema -v /tmp/model:/opt/ml/model sagemaker-sparkml-serving:3.3 serve
276276
```
277277

278278
#### Invoking with a payload
@@ -287,7 +287,7 @@ or
287287
curl -i -H "content-type:application/json" -d "{\"data\":[feature_1,\"feature_2\",feature_3]}" http://localhost:8080/invocations
288288
```
289289

290-
The `Dockerfile` can be found at the root directory of the package. SageMaker SparkML Serving Container tags the Docker images using the Spark major version it is compatible with. Right now, it only supports Spark 2.4 and as a result, the Docker image is tagged with 2.4.
290+
The `Dockerfile` can be found at the root directory of the package. SageMaker SparkML Serving Container tags the Docker images using the Spark major version it is compatible with. Right now, it only supports Spark 3.3.0 and as a result, the Docker image is tagged with 3.3.
291291

292292
In order to save the effort of building the Docker image everytime you are making a code change, you can also install [Maven](http://maven.apache.org/) and run `mvn clean package` at your project root to verify if the code is compiling fine and unit tests are running without any issue.
293293

@@ -310,7 +310,7 @@ aws ecr get-login --region us-west-2 --registry-ids 246618743249 --no-include-em
310310
* Download the Docker image with the following command:
311311

312312
```
313-
docker pull 246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-sparkml-serving:2.4
313+
docker pull 246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-sparkml-serving:3.3
314314
```
315315

316316
For running the Docker image, please see the Running the image locally section from above.

ci/buildspec.yml

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,41 @@
11
version: 0.2
2-
2+
env:
3+
variables:
4+
IS_GENERIC_IMAGE: "True"
5+
CODEBUILD_RESOLVED_SOURCE_VERSION: "sparkml-v33"
6+
TGT_IMAGE: "515193369038.dkr.ecr.us-west-2.amazonaws.com/sagemaker-sparkml-serving:3.3"
7+
DLC_IMAGES: "515193369038.dkr.ecr.us-west-2.amazonaws.com/sagemaker-sparkml-serving:3.3-pre-scan"
38
phases:
9+
install:
10+
runtime-versions:
11+
python: latest
12+
commands:
13+
- pip3 install pytest
14+
- pip3 install -r $CODEBUILD_SRC_DIR_Source2/src/requirements.txt
15+
- pip3 install -r $CODEBUILD_SRC_DIR_Source2/test/requirements.txt
416
pre_build:
517
commands:
618
- echo Logging in to Amazon ECR...
7-
- $(aws ecr get-login --no-include-email --region $AWS_DEFAULT_REGION)
19+
- aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin 515193369038.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com
820
build:
921
commands:
10-
- echo Build started on `date`
11-
- echo Building the Docker image...
12-
- docker build -t sagemaker-sparkml-serving:2.4 .
13-
- docker tag sagemaker-sparkml-serving:2.4 515193369038.dkr.ecr.us-west-2.amazonaws.com/sagemaker-sparkml-serving:2.4
22+
- echo Build started on `date`
23+
- echo Building the Docker image...
24+
- docker build -t sagemaker-sparkml-serving:3.3 .
25+
- echo Build completed on `date`
1426
post_build:
27+
on-failure: ABORT
1528
commands:
16-
- echo Build completed on `date`
17-
- echo Pushing the Docker image...
18-
- docker push 515193369038.dkr.ecr.us-west-2.amazonaws.com/sagemaker-sparkml-serving:2.4
29+
- echo Tagging pre-scan image...
30+
- docker tag sagemaker-sparkml-serving:3.3 $DLC_IMAGES
31+
- docker push $DLC_IMAGES
32+
- cd $CODEBUILD_SRC_DIR_Source2
33+
- export PYTHONPATH=$(pwd)/src
34+
- cd test/dlc_tests
35+
- echo Running enhanced ecr image scan
36+
- pytest -s sanity/test_ecr_scan.py::test_ecr_enhanced_scan
37+
- echo Tagging image for final push
38+
- docker tag sagemaker-sparkml-serving:3.3 $TGT_IMAGE
39+
- docker push $TGT_IMAGE
40+
- echo $TGT_IMAGE pushed to ECR
41+
- echo Push completed successfully on `date`

0 commit comments

Comments
 (0)