Skip to content

Commit dcfbca3

Browse files
committed
edit script
1 parent 5c27cf5 commit dcfbca3

File tree

1 file changed

+45
-112
lines changed

1 file changed

+45
-112
lines changed

.github/workflows/pr_benchmarks.yml

Lines changed: 45 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ on:
2626
pr_head_sha:
2727
description: 'PR Head SHA'
2828
required: true
29+
pr_branch:
30+
description: 'PR Branch'
31+
required: true
2932
base_branch:
3033
description: 'Base branch to compare against (usually main)'
3134
required: true
@@ -79,123 +82,53 @@ jobs:
7982

8083
- name: Generate benchmark data
8184
run: |
82-
# Run data generation for each benchmark
83-
cd pr_branch/benchmarks
84-
85-
# Parse benchmarks from input
86-
IFS=' ' read -r -a BENCHMARKS <<< "${{ github.event.inputs.benchmarks }}"
85+
### Command used to pre-warm (aka precompile) the directories
86+
export CARGO_COMMAND="cargo run --release"
8787
88-
# Generate data for each benchmark
89-
for benchmark in "${BENCHMARKS[@]}"; do
90-
echo "Generating data for $benchmark..."
91-
./bench.sh data "$benchmark"
92-
done
93-
94-
- name: Run PR branch benchmarks
95-
id: pr_benchmarks
96-
run: |
97-
# Navigate to PR branch
88+
# start compiling the branch (in the background)
9889
cd pr_branch/benchmarks
99-
100-
# Parse benchmarks from input
101-
IFS=' ' read -r -a BENCHMARKS <<< "${{ github.event.inputs.benchmarks }}"
102-
103-
# Use the branch name as results name
104-
BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD)
105-
BRANCH_NAME=${BRANCH_NAME//\//_}
106-
107-
# Run each benchmark
108-
for benchmark in "${BENCHMARKS[@]}"; do
109-
echo "Running $benchmark on PR branch..."
110-
RESULTS_NAME="$BRANCH_NAME" ./bench.sh run "$benchmark"
111-
done
112-
113-
echo "pr_results_dir=pr_branch/benchmarks/results/$BRANCH_NAME" >> $GITHUB_OUTPUT
114-
115-
- name: Run base branch benchmarks
116-
id: base_benchmarks
117-
run: |
118-
# Navigate to base branch
90+
export BRANCH_NAME=`git rev-parse --abbrev-ref HEAD`
91+
${CARGO_COMMAND} --bin tpch >> build.log 2>&1 &
92+
${CARGO_COMMAND} --bin parquet >> build.log 2>&1 &
93+
${CARGO_COMMAND} --bin dfbench >> build.log 2>&1 &
94+
popd
11995
cd base_branch/benchmarks
120-
121-
# Parse benchmarks from input
122-
IFS=' ' read -r -a BENCHMARKS <<< "${{ github.event.inputs.benchmarks }}"
123-
124-
# Use 'base_branch' as results name
125-
BRANCH_NAME="base_${BRANCH_NAME:-main}"
126-
BRANCH_NAME=${BRANCH_NAME//\//_}
127-
128-
# Run each benchmark
96+
${CARGO_COMMAND} --bin tpch >> build.log 2>&1 &
97+
${CARGO_COMMAND} --bin parquet >> build.log 2>&1 &
98+
${CARGO_COMMAND} --bin dfbench >> build.log 2>&1 &
99+
popd
100+
101+
# Wait for the compilation to finish
102+
wait
103+
# Check if the compilation was successful
104+
if grep -q "error" build.log; then
105+
echo "Compilation failed. Check build.log for details."
106+
exit 1
107+
fi
108+
echo "Compilation completed successfully."
109+
110+
# Set up the benchmarks in the base branch
111+
cd base_branch/benchmarks
112+
# Download data for each benchmark
129113
for benchmark in "${BENCHMARKS[@]}"; do
130-
echo "Running $benchmark on base branch..."
131-
RESULTS_NAME="$BRANCH_NAME" ./bench.sh run "$benchmark"
114+
echo "** Creating data if needed **"
115+
./bench.sh data $bench
116+
echo "** Running $bench baseline (merge-base from main)... **"
117+
export DATAFUSION_DIR=${GITHUB_WORKSPACE}/base_branch
118+
./bench.sh run $bench
119+
## Run against branch
120+
echo "** Running $bench branch... **"
121+
export DATAFUSION_DIR=${GITHUB_WORKSPACE}/pr_branch
122+
./bench.sh run $bench
132123
done
133-
134-
echo "base_results_dir=base_branch/benchmarks/results/$BRANCH_NAME" >> $GITHUB_OUTPUT
135124
136-
- name: Install comparison requirements
137-
run: |
138-
# Setup virtual environment with requirements
139-
cd pr_branch/benchmarks
125+
## Compare
126+
rm -f /tmp/report.txt
127+
export BENCH_BRANCH_NAME=${{ github.event.inputs.pr_branch }} # mind blowing syntax to replace / with _
128+
# Install requirements for comparison
140129
pip install -r requirements.txt
141-
142-
- name: Compare benchmark results
143-
id: compare
144-
run: |
145-
# Navigate to PR branch benchmark directory
146-
cd pr_branch/benchmarks
147-
148-
# Parse benchmarks from input
149-
IFS=' ' read -r -a BENCHMARKS <<< "${{ github.event.inputs.benchmarks }}"
150-
151-
# Initialize results variable
152-
COMPARISON_RESULTS=""
153-
154-
# Get the directory names
155-
PR_RESULTS_DIR="${{ steps.pr_benchmarks.outputs.pr_results_dir }}"
156-
BASE_RESULTS_DIR="${{ steps.base_benchmarks.outputs.base_results_dir }}"
157-
158-
# For each benchmark, run comparison
159-
for benchmark in "${BENCHMARKS[@]}"; do
160-
echo "Comparing $benchmark results..."
161-
162-
# Determine result file names based on benchmark
163-
if [[ "$benchmark" == "tpch" ]]; then
164-
RESULT_FILE="tpch_sf1.json"
165-
elif [[ "$benchmark" == "tpch_mem" ]]; then
166-
RESULT_FILE="tpch_mem_sf1.json"
167-
elif [[ "$benchmark" == "tpch10" ]]; then
168-
RESULT_FILE="tpch_sf10.json"
169-
elif [[ "$benchmark" == "tpch_mem10" ]]; then
170-
RESULT_FILE="tpch_mem_sf10.json"
171-
elif [[ "$benchmark" == "clickbench_1" ]]; then
172-
RESULT_FILE="clickbench_1.json"
173-
elif [[ "$benchmark" == "clickbench_partitioned" ]]; then
174-
RESULT_FILE="clickbench_partitioned.json"
175-
elif [[ "$benchmark" == "clickbench_extended" ]]; then
176-
RESULT_FILE="clickbench_extended.json"
177-
elif [[ "$benchmark" == "imdb" ]]; then
178-
RESULT_FILE="imdb.json"
179-
elif [[ "$benchmark" == "external_aggr" ]]; then
180-
RESULT_FILE="external_aggr.json"
181-
elif [[ "$benchmark" == "sort_tpch" ]]; then
182-
RESULT_FILE="sort_tpch.json"
183-
else
184-
RESULT_FILE="$benchmark.json"
185-
fi
186-
187-
# Check if both result files exist
188-
if [[ -f "$PR_RESULTS_DIR/$RESULT_FILE" && -f "$BASE_RESULTS_DIR/$RESULT_FILE" ]]; then
189-
# Run comparison and capture output
190-
OUTPUT=$(python compare.py "$PR_RESULTS_DIR/$RESULT_FILE" "$BASE_RESULTS_DIR/$RESULT_FILE")
191-
COMPARISON_RESULTS+="## $benchmark\n\n\`\`\`\n$OUTPUT\n\`\`\`\n\n"
192-
else
193-
COMPARISON_RESULTS+="## $benchmark\n\nResults not available for comparison.\n\n"
194-
fi
195-
done
196-
197-
# Save comparison results to file for use in PR comment
198-
echo -e "$COMPARISON_RESULTS" > /tmp/benchmark_comparison.txt
130+
# Run the comparison script
131+
./bench.sh compare HEAD "${BENCH_BRANCH_NAME}" | tee -a /tmp/report.txt
199132
200133
- name: Post results as PR comment
201134
uses: actions/github-script@v7
@@ -210,7 +143,7 @@ jobs:
210143
const comment_id = ${{ github.event.inputs.comment_id }};
211144
212145
// Read comparison results
213-
const comparisonText = fs.readFileSync('/tmp/benchmark_comparison.txt', 'utf8');
146+
const comparisonText = fs.readFileSync('/tmp/report.txt', 'utf8');
214147
215148
// Parse benchmarks from input
216149
const benchmarks = '${{ github.event.inputs.benchmarks }}'.split(' ');
@@ -230,7 +163,7 @@ jobs:
230163
231164
Triggered by [this comment](https://github.com/\${context.repo.owner}/\${context.repo.repo}/pull/\${pr_number}#issuecomment-\${comment_id})
232165
`;
233-
166+
234167
// Post comment to PR
235168
await github.rest.issues.createComment({
236169
owner: context.repo.owner,

0 commit comments

Comments
 (0)