Skip to content

Commit 7536545

Browse files
committed
Flash Attention Benchmarking on B200
Checkout the FA repo and run benchmark as part of this action
1 parent 4e34d94 commit 7536545

File tree

1 file changed

+44
-0
lines changed

1 file changed

+44
-0
lines changed

.github/workflows/flash_attention.yml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
name: Flash Attention Benchmark
2+
3+
# To remotely trigger a FA Benchmarking run, use the following:
4+
# curl -XPOST -H "Accept: application/vnd.github.v3+json" -H "Content-Type: application/json" https://api.github.com/repos/pytorch/pytorch-integration-testing/dispatches --data '{"event_type": "benchmark_flash_attention"}'
5+
6+
on:
7+
schedule:
8+
- cron: "0 6 * * *" # Run every day at 6AM
9+
push:
10+
paths:
11+
- .github/workflows/flash_attention.yml
12+
repository_dispatch:
13+
types: benchmark_flash_attention
14+
workflow_dispatch:
15+
jobs:
16+
benchmark-flash-attn:
17+
name: Flash Attention CuTe DSL Benchmark
18+
runs-on: B200
19+
container:
20+
# https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/
21+
image: nvcr.io/nvidia/pytorch:25.06-py3
22+
options: --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864
23+
steps:
24+
- uses: actions/checkout@v4
25+
with:
26+
repository: 'Dao-AILab/flash-attention'
27+
path: 'fa4'
28+
- name: Install CuTe DSL
29+
run: |
30+
set -x
31+
echo "Installing nvidia-cutlass-dsl"
32+
pip install nvidia-cutlass-dsl==4.1.0.dev0
33+
- name: Buid and Run FlashAttention CuTe DSL
34+
run: |
35+
set -x
36+
pushd fa4
37+
python setup.py install
38+
39+
echo '<h1>B200 1000W</h1>' >> $GITHUB_STEP_SUMMARY
40+
nvidia-smi
41+
export PYTHONPATH=$(pwd)
42+
python benchmarks/benchmark_attn.py >> $GITHUB_STEP_SUMMARY
43+
44+
popd

0 commit comments

Comments
 (0)