Skip to content

Commit 98a2699

Browse files
authored
feat: Add Hive 4.0.0 (#818)
* WIP, Hive 4 builds :) * Add pg driver and xml logger * fix wrong metastore tool path * link to upstream PR * changelog
1 parent 1965d50 commit 98a2699

File tree

6 files changed

+74
-6
lines changed

6 files changed

+74
-6
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file.
1010
- vector: Add version `0.40.0` ([#802]).
1111
- airflow: Add version `2.9.3` ([#809]).
1212
- kafka: Add version `3.8.0` ([#813]).
13+
- hive: Add version `4.0.0` ([#818]).
1314

1415
### Removed
1516

@@ -27,6 +28,7 @@ All notable changes to this project will be documented in this file.
2728
[#809]: https://github.com/stackabletech/docker-images/pull/809
2829
[#811]: https://github.com/stackabletech/docker-images/pull/811
2930
[#813]: https://github.com/stackabletech/docker-images/pull/813
31+
[#818]: https://github.com/stackabletech/docker-images/pull/818
3032

3133
## [24.7.0] - 2024-07-24
3234

hadoop/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ COPY hadoop/stackable/patches /stackable/patches
6363
RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC . && \
6464
patches/apply_patches.sh ${PRODUCT} && \
6565
cd hadoop-${PRODUCT}-src && \
66-
mvn clean package -Pdist,native -pl '!hadoop-tools/hadoop-pipes,!hadoop-yarn-project,!hadoop-mapreduce-project,!hadoop-minicluster' -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \
66+
mvn --no-transfer-progress clean package -Pdist,native -pl '!hadoop-tools/hadoop-pipes,!hadoop-yarn-project,!hadoop-mapreduce-project,!hadoop-minicluster' -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \
6767
cp -r hadoop-dist/target/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT} && \
6868
# HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves
6969
cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${PRODUCT}/bin && \
@@ -100,7 +100,7 @@ WORKDIR /stackable
100100

101101
RUN curl --fail -L "https://github.com/stackabletech/hdfs-utils/archive/refs/tags/v${HDFS_UTILS}.tar.gz" | tar -xzC . && \
102102
cd hdfs-utils-${HDFS_UTILS} && \
103-
mvn clean package -P hadoop-${PRODUCT} -DskipTests -Dmaven.javadoc.skip=true && \
103+
mvn --no-transfer-progress clean package -P hadoop-${PRODUCT} -DskipTests -Dmaven.javadoc.skip=true && \
104104
mkdir -p /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib && \
105105
cp target/hdfs-utils-$HDFS_UTILS.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar && \
106106
rm -rf /stackable/hdfs-utils-main

hive/Dockerfile

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,25 @@ curl --fail -L "https://repo.stackable.tech/repository/packages/hive/apache-hive
3030
patches/apply_patches.sh ${PRODUCT}
3131

3232
cd /stackable/apache-hive-${PRODUCT}-src/
33-
mvn --batch-mode --no-transfer-progress clean package -DskipTests --projects standalone-metastore
34-
mv standalone-metastore/target/apache-hive-metastore-${PRODUCT}-bin/apache-hive-metastore-${PRODUCT}-bin /stackable
3533

36-
ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin/ /stackable/hive-metastore
37-
cp /stackable/bin/start-metastore /stackable/hive-metastore/bin
34+
if [[ "${PRODUCT}" == "3.1.3" ]] ; then
35+
mvn --batch-mode --no-transfer-progress clean package -DskipTests --projects standalone-metastore
36+
mv standalone-metastore/target/apache-hive-metastore-${PRODUCT}-bin/apache-hive-metastore-${PRODUCT}-bin /stackable
37+
else
38+
# https://issues.apache.org/jira/browse/HIVE-20451 switched the metastore server packaging starting with 4.0.0
39+
cd standalone-metastore
40+
mvn --batch-mode --no-transfer-progress clean package -DskipTests --projects metastore-server
41+
42+
# We only seem to get a .tar.gz archive, so let's extract that to the correct location
43+
cd /stackable
44+
tar --extract -f /stackable/apache-hive-${PRODUCT}-src/standalone-metastore/metastore-server/target/apache-hive-standalone-metastore-server-${PRODUCT}-bin.tar.gz
45+
46+
# TODO: Remove once the fix https://github.com/apache/hive/pull/5419 is merged and released
47+
# The schemaTool.sh is still pointing to the class location from Hive < 4.0.0, it seems like it was forgotten to update it
48+
sed -i -e 's/CLASS=org.apache.hadoop.hive.metastore.tools.MetastoreSchemaTool/CLASS=org.apache.hadoop.hive.metastore.tools.schematool.MetastoreSchemaTool/' /stackable/apache-hive-metastore-${PRODUCT}-bin/bin/ext/schemaTool.sh
49+
fi
50+
51+
cp /stackable/bin/start-metastore /stackable/apache-hive-metastore-${PRODUCT}-bin/bin
3852
rm -rf /stackable/apache-hive-${PRODUCT}-src
3953

4054
curl --fail -L "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
diff --git a/standalone-metastore/metastore-server/pom.xml b/standalone-metastore/metastore-server/pom.xml
2+
index a8f680928c..7102f1b5ca 100644
3+
--- a/standalone-metastore/metastore-server/pom.xml
4+
+++ b/standalone-metastore/metastore-server/pom.xml
5+
@@ -334,7 +334,6 @@
6+
<dependency>
7+
<groupId>org.postgresql</groupId>
8+
<artifactId>postgresql</artifactId>
9+
- <optional>true</optional>
10+
</dependency>
11+
<dependency>
12+
<groupId>org.eclipse.jetty</groupId>
13+
diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml
14+
index 28ac5ceb65..e3cbd821bd 100644
15+
--- a/standalone-metastore/pom.xml
16+
+++ b/standalone-metastore/pom.xml
17+
@@ -397,7 +397,6 @@
18+
<groupId>org.postgresql</groupId>
19+
<artifactId>postgresql</artifactId>
20+
<version>${postgres.version}</version>
21+
- <scope>runtime</scope>
22+
</dependency>
23+
<dependency>
24+
<groupId>org.apache.httpcomponents</groupId>
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml
2+
index 28ac5ceb65..03097e7f40 100644
3+
--- a/standalone-metastore/pom.xml
4+
+++ b/standalone-metastore/pom.xml
5+
@@ -494,6 +494,11 @@
6+
<groupId>com.fasterxml.jackson.core</groupId>
7+
<artifactId>jackson-databind</artifactId>
8+
</dependency>
9+
+ <dependency>
10+
+ <!-- Optional log4j dependency to be able to use the XmlLayout -->
11+
+ <groupId>com.fasterxml.jackson.dataformat</groupId>
12+
+ <artifactId>jackson-dataformat-xml</artifactId>
13+
+ </dependency>
14+
</dependencies>
15+
<build>
16+
<pluginManagement>

hive/versions.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,16 @@
11
versions = [
2+
{
3+
"product": "4.0.0",
4+
"jmx_exporter": "1.0.1",
5+
# Hive 4 must be built with Java 8 (according to GitHub README) but seems to run on Java 11
6+
"java-base": "11",
7+
"java-devel": "1.8.0",
8+
"hadoop": "3.3.6",
9+
# Keep consistent with the dependency from Hadoop: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6
10+
"aws_java_sdk_bundle": "1.12.367",
11+
"azure_storage": "7.0.1",
12+
"azure_keyvault_core": "1.0.0",
13+
},
214
{
315
"product": "3.1.3",
416
"jmx_exporter": "1.0.1",

0 commit comments

Comments
 (0)