Skip to content

[Rust] set default target directory for rust and fix misuse of option_env #1186

[Rust] set default target directory for rust and fix misuse of option_env

[Rust] set default target directory for rust and fix misuse of option_env #1186

Workflow file for this run

# SPDX-FileCopyrightText: 2023 LakeSoul Contributors
#
# SPDX-License-Identifier: Apache-2.0
name: CI with Presto cdc Test
on:
push:
paths-ignore:
- "javadoc/**"
- "website/**"
- "cpp/**"
- "python/**"
- "**.md"
branches:
- "main"
pull_request:
paths-ignore:
- "javadoc/**"
- "website/**"
- "cpp/**"
- "python/**"
- "**.md"
branches:
- "main"
- "release/**"
workflow_dispatch:
jobs:
build:
runs-on: ubuntu-24.04
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: false
docker-images: true
swap-storage: true
- uses: actions/checkout@v4
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: "11"
distribution: "temurin"
cache: maven
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: "3.9"
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
pip install pymysql cryptography jproperties --no-cache-dir
wget -q https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz -O $HOME/hadoop-3.3.5.tar.gz && tar xf $HOME/hadoop-3.3.5.tar.gz -C $HOME
echo "HADOOP_HOME=$HOME/hadoop-3.3.5" >> $GITHUB_ENV
wget -q https://repo1.maven.org/maven2/org/apache/flink/flink-s3-fs-hadoop/1.20.1/flink-s3-fs-hadoop-1.20.1.jar -O $HOME/flink-s3-fs-hadoop-1.20.1.jar
wget -q https://repo1.maven.org/maven2/org/apache/parquet/parquet-hadoop-bundle/1.12.3/parquet-hadoop-bundle-1.12.3.jar -O $HOME/parquet-hadoop-bundle-1.12.3.jar
wget -q https://repo1.maven.org/maven2/org/apache/flink/flink-parquet/1.20.1/flink-parquet-1.20.1.jar -O $HOME/flink-parquet-1.20.1.jar
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
version: "23.x"
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
default: true
- uses: Swatinem/rust-cache@v2
with:
workspaces: "./ -> target"
- name: Cache Docker images
uses: ScribeMD/docker-cache@0.4.0
with:
key: docker-${{ runner.os }}-${{ hashFiles('Cross.toml', 'docker/lakesoul-docker-compose-env/docker-compose.yml') }}
- name: Pull images
run: |
docker pull -q swr.cn-southwest-2.myhuaweicloud.com/dmetasoul-repo/bitnami/spark:3.3.1
- uses: actions-rs/cargo@v1
with:
use-cross: true
command: build
args: "--target x86_64-unknown-linux-gnu --package lakesoul-io-c --package lakesoul-metadata-c --release --all-features"
- name: Build with Maven
run: |
mkdir -p target/release
cp rust/target/x86_64-unknown-linux-gnu/release/liblakesoul_io_c.so target/release
cp rust/target/x86_64-unknown-linux-gnu/release/liblakesoul_metadata_c.so target/release
MAVEN_OPTS="-Xmx4000m" mvn -q -B package -f pom.xml -Pcross-build -DskipTests
- name: Get jar names
run: |
echo "FLINK_JAR_NAME=$(python script/get_jar_name.py lakesoul-flink)" >> $GITHUB_ENV
echo "FLINK_TEST_JAR_NAME=$(python script/get_jar_name.py lakesoul-flink | sed -e 's/.jar/-tests.jar/g')" >> $GITHUB_ENV
echo "SPARK_JAR_NAME=$(python script/get_jar_name.py lakesoul-spark)" >> $GITHUB_ENV
echo "SPARK_TEST_JAR_NAME=$(python script/get_jar_name.py lakesoul-spark | sed -e 's/.jar/-tests.jar/g')" >> $GITHUB_ENV
echo "PRESTO_JAR_NAME=$(python script/get_jar_name.py lakesoul-presto)" >> $GITHUB_ENV
echo "PRESTO_TEST_JAR_NAME=$(python script/get_jar_name.py lakesoul-presto | sed -e 's/.jar/-tests.jar/g')" >> $GITHUB_ENV
- name: Copy built jar to work-dir
run: |
cp ./lakesoul-flink/target/$FLINK_JAR_NAME ./script/benchmark/work-dir
cp ./lakesoul-flink/target/$FLINK_TEST_JAR_NAME ./script/benchmark/work-dir
cp ./lakesoul-spark/target/$SPARK_JAR_NAME ./script/benchmark/work-dir
cp ./lakesoul-spark/target/$SPARK_TEST_JAR_NAME ./script/benchmark/work-dir
cp ./lakesoul-presto/target/$PRESTO_JAR_NAME ./script/benchmark/work-dir
cp ./lakesoul-presto/target/$PRESTO_TEST_JAR_NAME ./script/benchmark/work-dir
- name: Deploy cluster
run: |
sed -i 's/if NEW.version - rs_version >= 10 then/if NEW.version - rs_version >= 1 then/' script/meta_init.sql
sed -i 's/if NEW.version >= 10 then/if NEW.version >= 1 then/' script/meta_init.sql
cd ./docker/lakesoul-docker-compose-env
docker compose pull -q
docker compose --profile s3 up -d
sleep 30s
- name: Deploy Presto Server
run: |
echo "deploy presto server pwd: ${PWD}"
docker run -d --net lakesoul-docker-compose-env_default --name=presto -v ${PWD}/script/benchmark/work-dir/${PRESTO_JAR_NAME}:/opt/presto-server/plugin/lakesoul/${PRESTO_JAR_NAME} -v ${PWD}/script/benchmark/presto/catalog:/opt/presto-server/etc/catalog -v ${PWD}/script/benchmark/presto/config.properties:/opt/presto-server/etc/config.properties -v ${PWD}/script/benchmark/presto/jvm.config:/opt/presto-server/etc/jvm.config -v ${PWD}/script/benchmark/work-dir:/opt/spark/work-dir -v ${PWD}/script/benchmark/presto/lakesoul.properties:/root/lakesoul.properties --env lakesoul_home=/root/lakesoul.properties prestodb/presto:0.292
- name: Start compaction task
run: |
cd ./script/benchmark/work-dir
nohup docker run --cpus 2 -m 5000m --net lakesoul-docker-compose-env_default --rm -t -v ${PWD}:/opt/spark/work-dir --env lakesoul_home=/opt/spark/work-dir/lakesoul.properties swr.cn-southwest-2.myhuaweicloud.com/dmetasoul-repo/bitnami/spark:3.3.1 spark-submit --driver-memory 2G --executor-memory 2G --conf spark.driver.memoryOverhead=1500m --conf spark.executor.memoryOverhead=1500m --conf spark.hadoop.fs.s3.buffer.dir=/tmp --conf spark.hadoop.fs.s3a.buffer.dir=/tmp --conf spark.hadoop.fs.s3.impl=org.apache.hadoop.fs.s3a.S3AFileSystem --conf spark.hadoop.fs.s3a.path.style.access=true --conf spark.hadoop.fs.s3a.endpoint=http://minio:9000 --conf spark.hadoop.fs.s3a.access.key=minioadmin1 --conf spark.hadoop.fs.s3a.secret.key=minioadmin1 --conf spark.sql.warehouse.dir=s3://lakesoul-test-bucket/ --conf spark.hadoop.fs.s3a.fast.upload.buffer=disk --conf spark.hadoop.fs.s3a.fast.upload=true --conf spark.dmetasoul.lakesoul.native.io.enable=true --conf spark.dmetasoul.lakesoul.compaction.level1.file.number.limit=5 --conf spark.dmetasoul.lakesoul.compaction.level1.file.merge.num.limit=2 --class com.dmetasoul.lakesoul.spark.compaction.NewCompactionTask --master local[4] /opt/spark/work-dir/$SPARK_JAR_NAME --threadpool.size=10 --database="" --file_num_limit=5 --file_size_limit=10KB > compaction.log 2>&1 &
- name: Start flink mysql cdc task-1
run: |
docker exec -t lakesoul-docker-compose-env-jobmanager-1 flink run -d -c org.apache.flink.lakesoul.entry.MysqlCdc /opt/flink/work-dir/$FLINK_JAR_NAME --source_db.host mysql --source_db.port 3306 --source_db.db_name test_cdc --source_db.user root --source_db.password root --source.parallelism 2 --sink.parallelism 4 --use.cdc true --warehouse_path s3://lakesoul-test-bucket/data/ --flink.checkpoint s3://lakesoul-test-bucket/chk --flink.savepoint s3://lakesoul-test-bucket/svp --job.checkpoint_interval 5000 --server_time_zone UTC
sleep 30s
- name: Start flink source to sink task-2
run: |
docker exec -t lakesoul-docker-compose-env-jobmanager-1 flink run -d -c org.apache.flink.lakesoul.test.benchmark.LakeSoulSourceToSinkTable -C file:///opt/flink/work-dir/$FLINK_JAR_NAME /opt/flink/work-dir/$FLINK_TEST_JAR_NAME --source.database.name test_cdc --source.table.name default_init --sink.database.name flink_sink --sink.table.name default_init --use.cdc true --hash.bucket.number 2 --job.checkpoint_interval 10000 --server_time_zone UTC --warehouse.path s3://lakesoul-test-bucket/flink-sink/data --flink.checkpoint s3://lakesoul-test-bucket/flink-sink/chk
sleep 30s
- name: Download mysql driver jar
run: |
cd ./script/benchmark/work-dir
if [ ! -e mysql-connector-java-8.0.30.jar ]; then wget -q https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.30/mysql-connector-java-8.0.30.jar; fi
if [ ! -e presto-jdbc-0.292.jar ]; then wget -q https://repo1.maven.org/maven2/com/facebook/presto/presto-jdbc/0.292/presto-jdbc-0.292.jar; fi
- name: Create table and insert data
run: |
cd ./script/benchmark
python 1_create_table.py
docker exec -i lakesoul-docker-compose-env-mysql-1 bash /2_insert_table_data.sh
sleep 30s
- name: Presto Server Liveness Probe
run: |
docker ps | grep presto
- name: "[Check] Mysql cdc data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.292.jar com.facebook.presto.benchmark.Benchmark
- name: "[Check] Presto source to sink data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.292.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
- name: Adding columns for tables and deleting some data from tables
run: |
cd ./script/benchmark
python3 3_add_column.py
python3 delete_data.py
docker exec -i lakesoul-docker-compose-env-mysql-1 bash /2_insert_table_data.sh
sleep 60s
- name: "[Check] Mysql cdc data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.292.jar com.facebook.presto.benchmark.Benchmark
- name: "[Check] Presto source to sink data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.292.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
- name: Updating data in tables
run: |
cd ./script/benchmark
python3 4_update_data.py
sleep 60s
- name: "[Check] Mysql cdc data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.292.jar com.facebook.presto.benchmark.Benchmark
- name: "[Check] Presto source to sink data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.292.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
- name: Dropping columns and deleting some data in tables
run: |
cd ./script/benchmark
python3 6_drop_column.py
python3 delete_data.py
docker exec -i lakesoul-docker-compose-env-mysql-1 bash /2_insert_table_data.sh
sleep 60s
- name: "[Check] Mysql cdc data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.292.jar com.facebook.presto.benchmark.Benchmark
- name: "[Check] Presto source to sink data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.292.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
- name: Print Flink Log
if: always()
run: |
docker logs lakesoul-docker-compose-env-jobmanager-1 > flink-job-manager.log
docker logs lakesoul-docker-compose-env-taskmanager-1 > flink-task-manager-1.log
docker logs lakesoul-docker-compose-env-taskmanager-2 > flink-task-manager-2.log
docker logs presto > presto-server.log 2>&1
- name: Upload Log
if: always()
continue-on-error: true
uses: actions/upload-artifact@v4
with:
name: flink-presto-cluster-log
path: |
flink-*.log
presto-server.log
retention-days: 5
if-no-files-found: error
- name: Upload Compaction Log
if: always()
uses: actions/upload-artifact@v4
with:
name: lakesoul-compaction-log
path: ./script/benchmark/work-dir/compaction.log
retention-days: 3
if-no-files-found: error