Skip to content

Commit 5190213

Browse files
committed
Merge branch 'main' of https://github.com/datafuselabs/databend into add_runtime_bloom_filter_for_merge_into
2 parents 7bc84b8 + 4a55ea4 commit 5190213

File tree

97 files changed

+1605
-940
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+1605
-940
lines changed

โ€Ž.github/actions/benchmark_cloud/action.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ runs:
4949
BENDSQL_DSN: "databend://${{ inputs.cloud_user }}:${{ inputs.cloud_password }}@${{ inputs.cloud_gateway }}:443"
5050
run: |
5151
if [[ "${{ inputs.dataset }}" == "load" ]]; then
52-
echo "CREATE DATABASE IF NOT EXISTS load_test_${{ inputs.run_id }};" | bendsql
5352
echo "database=load_test_${{ inputs.run_id }}" >> $GITHUB_OUTPUT
5453
echo "tries=1" >> $GITHUB_OUTPUT
5554
else

โ€Ž.github/workflows/cloud.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ env:
1818

1919
jobs:
2020
info:
21-
if: contains(github.event.pull_request.labels.*.name, 'ci-cloud') || contains(github.event.pull_request.labels.*.name, 'ci-benchmark') || contains(github.event.pull_request.labels.*.name, 'ci-benchmark-cloud') || contains(github.event.pull_request.labels.*.name, 'ci-benchmark-local')
21+
if: contains(github.event.pull_request.labels.*.name, 'ci-cloud') || contains(github.event.pull_request.labels.*.name, 'ci-benchmark') || contains(github.event.pull_request.labels.*.name, 'ci-benchmark-cloud') || contains(github.event.pull_request.labels.*.name, 'ci-benchmark-load') || contains(github.event.pull_request.labels.*.name, 'ci-benchmark-local')
2222
runs-on: ubuntu-latest
2323
outputs:
2424
sha: ${{ steps.sha.outputs.sha }}
@@ -37,6 +37,8 @@ jobs:
3737
echo "target=all" >> $GITHUB_OUTPUT
3838
elif ${{ contains(github.event.pull_request.labels.*.name, 'ci-benchmark-cloud') }}; then
3939
echo "target=cloud" >> $GITHUB_OUTPUT
40+
elif ${{ contains(github.event.pull_request.labels.*.name, 'ci-benchmark-load') }}; then
41+
echo "target=load" >> $GITHUB_OUTPUT
4042
elif ${{ contains(github.event.pull_request.labels.*.name, 'ci-benchmark-local') }}; then
4143
echo "target=local" >> $GITHUB_OUTPUT
4244
fi

โ€Ž.github/workflows/reuse.benchmark.yml

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,47 @@ jobs:
129129
cloud_user: ${{ secrets.BENCHMARK_CLOUD_USER }}
130130
cloud_password: ${{ secrets.BENCHMARK_CLOUD_PASSWORD }}
131131
cloud_gateway: ${{ secrets.BENCHMARK_CLOUD_GATEWAY }}
132+
- name: clean
133+
if: always()
134+
continue-on-error: true
135+
env:
136+
BENDSQL_DSN: "databend://${{ secrets.BENCHMARK_CLOUD_USER }}:${{ secrets.BENCHMARK_CLOUD_PASSWORD }}@${{ secrets.BENCHMARK_CLOUD_GATEWAY }}:443/?warehouse=default"
137+
run: |
138+
echo "DROP WAREHOUSE IF EXISTS 'benchmark-${{ inputs.run_id }}';" | bendsql
139+
140+
load:
141+
runs-on: ubuntu-latest
142+
steps:
143+
- uses: actions/checkout@v4
144+
if: inputs.source == 'release'
145+
- uses: actions/checkout@v4
146+
if: inputs.source == 'pr'
147+
with:
148+
ref: "refs/pull/${{ inputs.source_id }}/merge"
149+
- uses: ./.github/actions/setup_bendsql
150+
- uses: ./.github/actions/benchmark_cloud
151+
if: inputs.target == 'load' || inputs.target == 'all'
152+
timeout-minutes: 120
153+
id: benchmark
154+
with:
155+
sha: ${{ inputs.sha }}
156+
run_id: ${{ inputs.run_id }}
157+
dataset: load
158+
source: ${{ inputs.source }}
159+
source_id: ${{ inputs.source_id }}
160+
size: Small
161+
version: ${{ inputs.version }}
162+
cloud_user: ${{ secrets.BENCHMARK_CLOUD_USER }}
163+
cloud_password: ${{ secrets.BENCHMARK_CLOUD_PASSWORD }}
164+
cloud_gateway: ${{ secrets.BENCHMARK_CLOUD_GATEWAY }}
165+
- name: clean
166+
if: always()
167+
continue-on-error: true
168+
env:
169+
BENDSQL_DSN: "databend://${{ secrets.BENCHMARK_CLOUD_USER }}:${{ secrets.BENCHMARK_CLOUD_PASSWORD }}@${{ secrets.BENCHMARK_CLOUD_GATEWAY }}:443/?warehouse=default"
170+
run: |
171+
echo "DROP DATABASE IF EXISTS 'load_test_${{ inputs.run_id }}';" | bendsql
172+
echo "DROP WAREHOUSE IF EXISTS 'benchmark-${{ inputs.run_id }}';" | bendsql
132173
133174
comment:
134175
needs: [local, cloud]
@@ -187,6 +228,8 @@ jobs:
187228
dataset:
188229
- "tpch"
189230
- "hits"
231+
- "load"
232+
- "internal"
190233
steps:
191234
- uses: actions/checkout@v4
192235
- name: Install Dependencies
@@ -196,7 +239,7 @@ jobs:
196239
- uses: actions/download-artifact@v4
197240
with:
198241
path: benchmark/clickbench/results
199-
pattern: benchmark-*
242+
pattern: benchmark-${{ matrix.dataset }}-*
200243
merge-multiple: true
201244
- name: Generate report and upload to R2
202245
working-directory: benchmark/clickbench

โ€ŽCargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

โ€Žbenchmark/clickbench/benchmark_cloud.sh

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ esac
5252
echo "#######################################################"
5353
echo "Running benchmark for Databend Cloud with S3 storage..."
5454

55-
export BENDSQL_DSN="databend://${CLOUD_USER}:${CLOUD_PASSWORD}@${CLOUD_GATEWAY}:443/${BENCHMARK_DATABASE}?warehouse=${CLOUD_WAREHOUSE}"
55+
export BENDSQL_DSN="databend://${CLOUD_USER}:${CLOUD_PASSWORD}@${CLOUD_GATEWAY}:443"
5656

5757
echo "Creating warehouse..."
5858
echo "DROP WAREHOUSE IF EXISTS '${CLOUD_WAREHOUSE}';" | bendsql
@@ -71,6 +71,14 @@ until bendsql --query="SHOW WAREHOUSES LIKE '${CLOUD_WAREHOUSE}'" | grep -q "Run
7171
sleep 10
7272
done
7373

74+
export BENDSQL_DSN="databend://${CLOUD_USER}:${CLOUD_PASSWORD}@${CLOUD_GATEWAY}:443/${BENCHMARK_DATABASE}?warehouse=${CLOUD_WAREHOUSE}"
75+
76+
if [[ "${BENCHMARK_DATASET}" == "load" ]]; then
77+
echo "Creating database..."
78+
echo "DROP DATABASE IF EXISTS ${BENCHMARK_DATABASE};" | bendsql --database default
79+
echo "CREATE DATABASE ${BENCHMARK_DATABASE};" | bendsql --database default
80+
fi
81+
7482
echo "Checking session settings..."
7583
bendsql --query="select * from system.settings where value != default;" -o table
7684

@@ -99,7 +107,9 @@ function run_query() {
99107

100108
QUERY_NUM=0
101109
for query in "${BENCHMARK_DATASET}"/queries/*.sql; do
102-
echo "Running Q${QUERY_NUM}: ${query}"
110+
echo
111+
echo "==> Running Q${QUERY_NUM}: ${query}"
112+
cat "$query"
103113
yq -i ".result += [[]]" -o json result.json
104114
for i in $(seq 1 "$BENCHMARK_TRIES"); do
105115
run_query "$QUERY_NUM" "$i" "$query"

โ€Žbenchmark/clickbench/benchmark_local.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,9 @@ function run_query() {
110110
TRIES=3
111111
QUERY_NUM=0
112112
for query in "${BENCHMARK_DATASET}"/queries/*.sql; do
113-
echo "Running Q${QUERY_NUM}: ${query}"
113+
echo
114+
echo "==> Running Q${QUERY_NUM}: ${query}"
115+
cat "$query"
114116
sync
115117
echo 3 | sudo tee /proc/sys/vm/drop_caches
116118
yq -i ".result += [[]]" -o json result.json

โ€Žbenchmark/clickbench/update_results.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,14 @@
1616

1717
def update_results(dataset, title, url):
1818
queries = []
19-
for query_file in glob.glob(f"{dataset}/queries/*.sql"):
19+
for query_file in sorted(glob.glob(f"{dataset}/queries/*.sql")):
2020
with open(query_file, "r") as f:
2121
queries.append(f.read())
2222
results = []
2323
for result_file in glob.glob(f"results/{dataset}/**/*.json", recursive=True):
2424
logger.info(f"reading result: {result_file}...")
2525
with open(result_file, "r") as f:
26-
result = json.load(f)
27-
# if dataset == "tpch":
28-
# result["result"].insert(0, [0.01, 0.01, 0.01])
29-
results.append(result)
26+
results.append(json.load(f))
3027

3128
logger.info("loading report template %s ...", TEMPLATE_FILE)
3229
templateLoader = FileSystemLoader(searchpath="./")

โ€Žscripts/benchmark/query/load/tpch10.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ select version();
77
SQL
88

99
for t in customer lineitem nation orders partsupp part region supplier; do
10-
echo "DROP TABLE IF EXISTS $t;" | bendsql
10+
echo "DROP TABLE IF EXISTS $t;" | bendsql
1111
done
1212

1313
cat <<SQL | bendsql
@@ -113,8 +113,8 @@ cat <<SQL | bendsql
113113
SQL
114114

115115
for t in customer lineitem nation orders partsupp part region supplier; do
116-
echo "loading into $t ..."
117-
cat <<SQL | bendsql
116+
echo "loading into $t ..."
117+
cat <<SQL | bendsql
118118
COPY INTO $t FROM 's3://repo.databend.rs/datasets/tpch10/${t}/' connection=(connection_name='repo') pattern ='${t}.*'
119119
file_format=(type='CSV' field_delimiter='|' record_delimiter='\\n' skip_header=0);
120120
ANALYZE TABLE "${t}";

โ€Žscripts/benchmark/query/load/tpch100.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ select version();
77
SQL
88

99
for t in customer lineitem nation orders partsupp part region supplier; do
10-
echo "DROP TABLE IF EXISTS $t;" | bendsql
10+
echo "DROP TABLE IF EXISTS $t;" | bendsql
1111
done
1212

1313
cat <<SQL | bendsql
@@ -113,8 +113,8 @@ cat <<SQL | bendsql
113113
SQL
114114

115115
for t in nation region; do
116-
echo "loading into $t ..."
117-
cat <<SQL | bendsql
116+
echo "loading into $t ..."
117+
cat <<SQL | bendsql
118118
COPY INTO $t FROM 's3://repo.databend.rs/tpch100/${t}.tbl'
119119
credentials=(access_key_id ='$REPO_ACCESS_KEY_ID' secret_access_key ='$REPO_SECRET_ACCESS_KEY')
120120
file_format=(type='CSV' field_delimiter='|' record_delimiter='\\n' skip_header=1);
@@ -124,8 +124,8 @@ SQL
124124
done
125125

126126
for t in customer lineitem orders partsupp part supplier; do
127-
echo "loading into $t ..."
128-
cat <<SQL | bendsql
127+
echo "loading into $t ..."
128+
cat <<SQL | bendsql
129129
COPY INTO $t FROM 's3://repo.databend.rs/tpch100/${t}/' connection=(connection_name='repo') pattern ='${t}.tbl.*'
130130
file_format=(type='CSV' field_delimiter='|' record_delimiter='\\n' skip_header=1);
131131
ANALYZE TABLE "${t}";

โ€Žsrc/meta/api/src/background_api_impl.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ use databend_common_meta_app::background::UpdateBackgroundJobReply;
3838
use databend_common_meta_app::background::UpdateBackgroundJobStatusReq;
3939
use databend_common_meta_app::background::UpdateBackgroundTaskReply;
4040
use databend_common_meta_app::background::UpdateBackgroundTaskReq;
41+
use databend_common_meta_app::id_generator::IdGenerator;
4142
use databend_common_meta_kvapi::kvapi;
4243
use databend_common_meta_kvapi::kvapi::Key;
4344
use databend_common_meta_kvapi::kvapi::UpsertKVReq;
@@ -57,7 +58,6 @@ use crate::deserialize_struct;
5758
use crate::fetch_id;
5859
use crate::get_pb_value;
5960
use crate::get_u64_value;
60-
use crate::id_generator::IdGenerator;
6161
use crate::kv_app_error::KVAppError;
6262
use crate::send_txn;
6363
use crate::serialize_struct;

0 commit comments

Comments
ย (0)