Skip to content

Commit 47b9d06

Browse files
fix: Spark HBase connector on aarch64 (#882)
* fix: Spark HBase connector on aarch64 * fix: Issue number in the changelog * Use the same Hadoop version as in the HBase image * Update spark-k8s/versions.py Co-authored-by: Sebastian Bernauer <sebastian.bernauer@stackable.de> * Update spark-k8s/versions.py Co-authored-by: Sebastian Bernauer <sebastian.bernauer@stackable.de> --------- Co-authored-by: Sebastian Bernauer <sebastian.bernauer@stackable.de>
1 parent 795f7a6 commit 47b9d06

File tree

8 files changed

+131
-16
lines changed

8 files changed

+131
-16
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ All notable changes to this project will be documented in this file.
2121
- hbase: Add hadoop-azure.jar to the lib directory to support the Azure Blob Filesystem and
2222
the Azure Data Lake Storage ([#853]).
2323
- kafka: Add cyrus-sasl-gssapi package for kerberos ([#874]).
24-
- spark: Add HBase connector ([#878]).
24+
- spark: Add HBase connector ([#878], [#882]).
2525

2626
### Changed
2727

@@ -75,6 +75,7 @@ All notable changes to this project will be documented in this file.
7575
[#878]: https://github.com/stackabletech/docker-images/pull/878
7676
[#879]: https://github.com/stackabletech/docker-images/pull/879
7777
[#881]: https://github.com/stackabletech/docker-images/pull/881
78+
[#882]: https://github.com/stackabletech/docker-images/pull/882
7879

7980
## [24.7.0] - 2024-07-24
8081

hadoop/stackable/patches/3.3.4/001-YARN-11527-3.3.4.patch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ index 0b2f6f17157d..9dc8b653eb93 100644
77
<json-smart.version>2.4.7</json-smart.version>
88
<nimbus-jose-jwt.version>9.8.1</nimbus-jose-jwt.version>
99
- <nodejs.version>v12.22.1</nodejs.version>
10-
+ <nodejs.version>v14.0.0</nodejs.version>
10+
+ <nodejs.version>v14.17.0</nodejs.version>
1111
<yarnpkg.version>v1.22.5</yarnpkg.version>
1212
<apache-ant.version>1.10.11</apache-ant.version>
1313
</properties>

hadoop/stackable/patches/3.3.6/001-YARN-11527-3.3.6.patch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ index f1ac43ed5b38..73d0c7580338 100644
77
<woodstox.version>5.4.0</woodstox.version>
88
<nimbus-jose-jwt.version>9.8.1</nimbus-jose-jwt.version>
99
- <nodejs.version>v12.22.1</nodejs.version>
10-
+ <nodejs.version>v14.0.0</nodejs.version>
10+
+ <nodejs.version>v14.17.0</nodejs.version>
1111
<yarnpkg.version>v1.22.5</yarnpkg.version>
1212
<apache-ant.version>1.10.13</apache-ant.version>
1313
<jmh.version>1.20</jmh.version>

hadoop/stackable/patches/3.4.0/001-YARN-11527-3.4.0.patch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ index 0ed96d087bc..9ebb6af4567 100644
77
<woodstox.version>5.4.0</woodstox.version>
88
<nimbus-jose-jwt.version>9.31</nimbus-jose-jwt.version>
99
- <nodejs.version>v12.22.1</nodejs.version>
10-
+ <nodejs.version>v14.0.0</nodejs.version>
10+
+ <nodejs.version>v14.17.0</nodejs.version>
1111
<yarnpkg.version>v1.22.5</yarnpkg.version>
1212
<apache-ant.version>1.10.13</apache-ant.version>
1313
<jmh.version>1.20</jmh.version>

spark-k8s/Dockerfile

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,19 @@ ARG HADOOP
5353
ARG HBASE
5454
ARG HBASE_CONNECTOR
5555

56-
WORKDIR /stackable
57-
58-
# Download the hbase-connectors source code
5956
RUN <<EOF
60-
curl https://repo.stackable.tech/repository/packages/hbase-connectors/hbase-connectors_${HBASE_CONNECTOR}.tar.gz \
61-
| tar xz
62-
ln -s hbase-connectors-rel-${HBASE_CONNECTOR} hbase-connectors
57+
microdnf update
58+
59+
# patch: Required for the apply-patches.sh script
60+
microdnf install \
61+
patch
62+
63+
microdnf clean all
64+
rm -rf /var/cache/yum
6365
EOF
6466

67+
WORKDIR /stackable
68+
6569
# Copy the pom.xml file from the patched Spark source code to read the
6670
# versions used by Spark. The pom.xml defines child modules which are
6771
# not required and not copied, therefore mvn must be called with the
@@ -70,6 +74,23 @@ COPY --chown=stackable:stackable --from=spark-source-builder \
7074
/stackable/spark/pom.xml \
7175
spark/
7276

77+
# Download the hbase-connectors source code
78+
RUN <<EOF
79+
curl https://repo.stackable.tech/repository/packages/hbase-connectors/hbase-connectors_${HBASE_CONNECTOR}.tar.gz \
80+
| tar xz
81+
ln -s hbase-connectors-rel-${HBASE_CONNECTOR} hbase-connectors
82+
EOF
83+
84+
# Patch the hbase-connectors source code
85+
WORKDIR /stackable/hbase-connectors
86+
COPY --chown=stackable:stackable \
87+
spark-k8s/stackable/hbase-connectors-patches/apply_patches.sh \
88+
patches/apply_patches.sh
89+
COPY --chown=stackable:stackable \
90+
spark-k8s/stackable/hbase-connectors-patches/${HBASE_CONNECTOR} \
91+
patches/${HBASE_CONNECTOR}
92+
RUN patches/apply_patches.sh ${HBASE_CONNECTOR}
93+
7394
WORKDIR /stackable/hbase-connectors/spark
7495

7596
RUN <<EOF
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
diff --git a/pom.xml b/pom.xml
2+
index e849cd1..f514e14 100644
3+
--- a/pom.xml
4+
+++ b/pom.xml
5+
@@ -157,7 +157,10 @@
6+
<extra.enforcer.version>1.5.1</extra.enforcer.version>
7+
<restrict-imports.enforcer.version>0.14.0</restrict-imports.enforcer.version>
8+
<!--Internally we use a different version of protobuf. See hbase-protocol-shaded-->
9+
- <external.protobuf.version>2.5.0</external.protobuf.version>
10+
+ <!-- com.google repo will be used except on Aarch64 platform. -->
11+
+ <external.protobuf.groupId>com.google.protobuf</external.protobuf.groupId>
12+
+ <external.protobuf.version>2.6.1</external.protobuf.version>
13+
+ <external.protobuf.exe.version>${external.protobuf.version}</external.protobuf.exe.version>
14+
<protobuf.plugin.version>0.5.0</protobuf.plugin.version>
15+
<commons-io.version>2.11.0</commons-io.version>
16+
<avro.version>1.7.7</avro.version>
17+
@@ -933,5 +936,19 @@
18+
</plugins>
19+
</build>
20+
</profile>
21+
+ <!-- use com.github.os72 on aarch64 platform -->
22+
+ <profile>
23+
+ <id>aarch64</id>
24+
+ <properties>
25+
+ <external.protobuf.groupId>com.github.os72</external.protobuf.groupId>
26+
+ <external.protobuf.exe.version>2.6.1-build3</external.protobuf.exe.version>
27+
+ </properties>
28+
+ <activation>
29+
+ <os>
30+
+ <family>linux</family>
31+
+ <arch>aarch64</arch>
32+
+ </os>
33+
+ </activation>
34+
+ </profile>
35+
</profiles>
36+
</project>
37+
diff --git a/spark/pom.xml b/spark/pom.xml
38+
index 3f1eb21..fcdc73e 100644
39+
--- a/spark/pom.xml
40+
+++ b/spark/pom.xml
41+
@@ -84,7 +84,7 @@
42+
<artifactId>protobuf-maven-plugin</artifactId>
43+
<version>${protobuf.plugin.version}</version>
44+
<configuration>
45+
- <protocArtifact>com.google.protobuf:protoc:${external.protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
46+
+ <protocArtifact>${external.protobuf.groupId}:protoc:${external.protobuf.exe.version}:exe:${os.detected.classifier}</protocArtifact>
47+
<protoSourceRoot>${basedir}/src/main/protobuf/</protoSourceRoot>
48+
<clearOutputDirectory>false</clearOutputDirectory>
49+
<checkStaleness>true</checkStaleness>
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env bash
2+
3+
# Enable error handling and unset variable checking
4+
set -eu
5+
set -o pipefail
6+
7+
# Check if $1 (VERSION) is provided
8+
if [ -z "${1-}" ]; then
9+
echo "Please provide a value for VERSION as the first argument."
10+
exit 1
11+
fi
12+
13+
VERSION="$1"
14+
PATCH_DIR="patches/$VERSION"
15+
16+
# Check if version-specific patches directory exists
17+
if [ ! -d "$PATCH_DIR" ]; then
18+
echo "Patches directory '$PATCH_DIR' does not exist."
19+
exit 1
20+
fi
21+
22+
# Create an array to hold the patches in sorted order
23+
declare -a patch_files=()
24+
25+
echo "Applying patches from ${PATCH_DIR}" now
26+
27+
# Read the patch files into the array
28+
while IFS= read -r -d $'\0' file; do
29+
patch_files+=("$file")
30+
done < <(find "$PATCH_DIR" -name "*.patch" -print0 | sort -zV)
31+
32+
echo "Found ${#patch_files[@]} patches, applying now"
33+
34+
# Iterate through sorted patch files
35+
for patch_file in "${patch_files[@]}"; do
36+
echo "Applying $patch_file"
37+
# We can not use Git here, as we are not within a Git repo
38+
patch --directory "." --strip=1 < "$patch_file" || {
39+
echo "Failed to apply $patch_file"
40+
exit 1
41+
}
42+
done
43+
44+
echo "All patches applied successfully."

spark-k8s/versions.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
"java-base": "17",
55
"java-devel": "17",
66
"python": "3.11",
7-
"hadoop": "3.3.4", # https://github.com/apache/spark/blob/6a5747d66e53ed0d934cdd9ca5c9bd9fde6868e6/pom.xml#L125
7+
"hadoop": "3.3.6", # Hadoop version defined in ../hbase/versions.py to reduce build time and disk requirements
88
"hbase": "2.4.18", # current Stackable LTS version
9-
"aws_java_sdk_bundle": "1.12.262", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.4
10-
"azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.4
9+
"aws_java_sdk_bundle": "1.12.367", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6
10+
"azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.6
1111
"azure_keyvault_core": "1.0.0", # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1
1212
"jackson_dataformat_xml": "2.15.2", # https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/3.5.1
1313
"stax2_api": "4.2.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2
@@ -22,10 +22,10 @@
2222
"java-base": "17",
2323
"java-devel": "17",
2424
"python": "3.11",
25-
"hadoop": "3.3.4", # https://github.com/apache/spark/blob/6a5747d66e53ed0d934cdd9ca5c9bd9fde6868e6/pom.xml#L125
25+
"hadoop": "3.3.6", # Hadoop version defined in ../hbase/versions.py to reduce build time and disk requirements
2626
"hbase": "2.4.18", # current Stackable LTS version
27-
"aws_java_sdk_bundle": "1.12.262", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.4
28-
"azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.4
27+
"aws_java_sdk_bundle": "1.12.367", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6
28+
"azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.6
2929
"azure_keyvault_core": "1.0.0", # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1
3030
"jackson_dataformat_xml": "2.15.2", # https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/3.5.1
3131
"stax2_api": "4.2.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2

0 commit comments

Comments
 (0)