26
26
pr_head_sha :
27
27
description : ' PR Head SHA'
28
28
required : true
29
+ pr_branch :
30
+ description : ' PR Branch'
31
+ required : true
29
32
base_branch :
30
33
description : ' Base branch to compare against (usually main)'
31
34
required : true
@@ -79,123 +82,53 @@ jobs:
79
82
80
83
- name : Generate benchmark data
81
84
run : |
82
- # Run data generation for each benchmark
83
- cd pr_branch/benchmarks
84
-
85
- # Parse benchmarks from input
86
- IFS=' ' read -r -a BENCHMARKS <<< "${{ github.event.inputs.benchmarks }}"
85
+ ### Command used to pre-warm (aka precompile) the directories
86
+ export CARGO_COMMAND="cargo run --release"
87
87
88
- # Generate data for each benchmark
89
- for benchmark in "${BENCHMARKS[@]}"; do
90
- echo "Generating data for $benchmark..."
91
- ./bench.sh data "$benchmark"
92
- done
93
-
94
- - name : Run PR branch benchmarks
95
- id : pr_benchmarks
96
- run : |
97
- # Navigate to PR branch
88
+ # start compiling the branch (in the background)
98
89
cd pr_branch/benchmarks
99
-
100
- # Parse benchmarks from input
101
- IFS=' ' read -r -a BENCHMARKS <<< "${{ github.event.inputs.benchmarks }}"
102
-
103
- # Use the branch name as results name
104
- BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD)
105
- BRANCH_NAME=${BRANCH_NAME//\//_}
106
-
107
- # Run each benchmark
108
- for benchmark in "${BENCHMARKS[@]}"; do
109
- echo "Running $benchmark on PR branch..."
110
- RESULTS_NAME="$BRANCH_NAME" ./bench.sh run "$benchmark"
111
- done
112
-
113
- echo "pr_results_dir=pr_branch/benchmarks/results/$BRANCH_NAME" >> $GITHUB_OUTPUT
114
-
115
- - name : Run base branch benchmarks
116
- id : base_benchmarks
117
- run : |
118
- # Navigate to base branch
90
+ export BRANCH_NAME=`git rev-parse --abbrev-ref HEAD`
91
+ ${CARGO_COMMAND} --bin tpch >> build.log 2>&1 &
92
+ ${CARGO_COMMAND} --bin parquet >> build.log 2>&1 &
93
+ ${CARGO_COMMAND} --bin dfbench >> build.log 2>&1 &
94
+ popd
119
95
cd base_branch/benchmarks
120
-
121
- # Parse benchmarks from input
122
- IFS=' ' read -r -a BENCHMARKS <<< "${{ github.event.inputs.benchmarks }}"
123
-
124
- # Use 'base_branch' as results name
125
- BRANCH_NAME="base_${BRANCH_NAME:-main}"
126
- BRANCH_NAME=${BRANCH_NAME//\//_}
127
-
128
- # Run each benchmark
96
+ ${CARGO_COMMAND} --bin tpch >> build.log 2>&1 &
97
+ ${CARGO_COMMAND} --bin parquet >> build.log 2>&1 &
98
+ ${CARGO_COMMAND} --bin dfbench >> build.log 2>&1 &
99
+ popd
100
+
101
+ # Wait for the compilation to finish
102
+ wait
103
+ # Check if the compilation was successful
104
+ if grep -q "error" build.log; then
105
+ echo "Compilation failed. Check build.log for details."
106
+ exit 1
107
+ fi
108
+ echo "Compilation completed successfully."
109
+
110
+ # Set up the benchmarks in the base branch
111
+ cd base_branch/benchmarks
112
+ # Download data for each benchmark
129
113
for benchmark in "${BENCHMARKS[@]}"; do
130
- echo "Running $benchmark on base branch..."
131
- RESULTS_NAME="$BRANCH_NAME" ./bench.sh run "$benchmark"
114
+ echo "** Creating data if needed **"
115
+ ./bench.sh data $bench
116
+ echo "** Running $bench baseline (merge-base from main)... **"
117
+ export DATAFUSION_DIR=${GITHUB_WORKSPACE}/base_branch
118
+ ./bench.sh run $bench
119
+ ## Run against branch
120
+ echo "** Running $bench branch... **"
121
+ export DATAFUSION_DIR=${GITHUB_WORKSPACE}/pr_branch
122
+ ./bench.sh run $bench
132
123
done
133
-
134
- echo "base_results_dir=base_branch/benchmarks/results/$BRANCH_NAME" >> $GITHUB_OUTPUT
135
124
136
- - name : Install comparison requirements
137
- run : |
138
- # Setup virtual environment with requirements
139
- cd pr_branch/benchmarks
125
+ ## Compare
126
+ rm -f /tmp/report.txt
127
+ export BENCH_BRANCH_NAME=${{ github.event.inputs.pr_branch }} # mind blowing syntax to replace / with _
128
+ # Install requirements for comparison
140
129
pip install -r requirements.txt
141
-
142
- - name : Compare benchmark results
143
- id : compare
144
- run : |
145
- # Navigate to PR branch benchmark directory
146
- cd pr_branch/benchmarks
147
-
148
- # Parse benchmarks from input
149
- IFS=' ' read -r -a BENCHMARKS <<< "${{ github.event.inputs.benchmarks }}"
150
-
151
- # Initialize results variable
152
- COMPARISON_RESULTS=""
153
-
154
- # Get the directory names
155
- PR_RESULTS_DIR="${{ steps.pr_benchmarks.outputs.pr_results_dir }}"
156
- BASE_RESULTS_DIR="${{ steps.base_benchmarks.outputs.base_results_dir }}"
157
-
158
- # For each benchmark, run comparison
159
- for benchmark in "${BENCHMARKS[@]}"; do
160
- echo "Comparing $benchmark results..."
161
-
162
- # Determine result file names based on benchmark
163
- if [[ "$benchmark" == "tpch" ]]; then
164
- RESULT_FILE="tpch_sf1.json"
165
- elif [[ "$benchmark" == "tpch_mem" ]]; then
166
- RESULT_FILE="tpch_mem_sf1.json"
167
- elif [[ "$benchmark" == "tpch10" ]]; then
168
- RESULT_FILE="tpch_sf10.json"
169
- elif [[ "$benchmark" == "tpch_mem10" ]]; then
170
- RESULT_FILE="tpch_mem_sf10.json"
171
- elif [[ "$benchmark" == "clickbench_1" ]]; then
172
- RESULT_FILE="clickbench_1.json"
173
- elif [[ "$benchmark" == "clickbench_partitioned" ]]; then
174
- RESULT_FILE="clickbench_partitioned.json"
175
- elif [[ "$benchmark" == "clickbench_extended" ]]; then
176
- RESULT_FILE="clickbench_extended.json"
177
- elif [[ "$benchmark" == "imdb" ]]; then
178
- RESULT_FILE="imdb.json"
179
- elif [[ "$benchmark" == "external_aggr" ]]; then
180
- RESULT_FILE="external_aggr.json"
181
- elif [[ "$benchmark" == "sort_tpch" ]]; then
182
- RESULT_FILE="sort_tpch.json"
183
- else
184
- RESULT_FILE="$benchmark.json"
185
- fi
186
-
187
- # Check if both result files exist
188
- if [[ -f "$PR_RESULTS_DIR/$RESULT_FILE" && -f "$BASE_RESULTS_DIR/$RESULT_FILE" ]]; then
189
- # Run comparison and capture output
190
- OUTPUT=$(python compare.py "$PR_RESULTS_DIR/$RESULT_FILE" "$BASE_RESULTS_DIR/$RESULT_FILE")
191
- COMPARISON_RESULTS+="## $benchmark\n\n\`\`\`\n$OUTPUT\n\`\`\`\n\n"
192
- else
193
- COMPARISON_RESULTS+="## $benchmark\n\nResults not available for comparison.\n\n"
194
- fi
195
- done
196
-
197
- # Save comparison results to file for use in PR comment
198
- echo -e "$COMPARISON_RESULTS" > /tmp/benchmark_comparison.txt
130
+ # Run the comparison script
131
+ ./bench.sh compare HEAD "${BENCH_BRANCH_NAME}" | tee -a /tmp/report.txt
199
132
200
133
- name : Post results as PR comment
201
134
uses : actions/github-script@v7
@@ -210,7 +143,7 @@ jobs:
210
143
const comment_id = ${{ github.event.inputs.comment_id }};
211
144
212
145
// Read comparison results
213
- const comparisonText = fs.readFileSync('/tmp/benchmark_comparison .txt', 'utf8');
146
+ const comparisonText = fs.readFileSync('/tmp/report .txt', 'utf8');
214
147
215
148
// Parse benchmarks from input
216
149
const benchmarks = '${{ github.event.inputs.benchmarks }}'.split(' ');
@@ -230,7 +163,7 @@ jobs:
230
163
231
164
Triggered by [this comment](https://github.com/\${context.repo.owner}/\${context.repo.repo}/pull/\${pr_number}#issuecomment-\${comment_id})
232
165
`;
233
-
166
+
234
167
// Post comment to PR
235
168
await github.rest.issues.createComment({
236
169
owner: context.repo.owner,
0 commit comments