Skip to content

Commit 149bc71

Browse files
committed
Flash Attention Benchmarking on B200
Checkout the FA repo and run benchmark as part of this action
1 parent 4e34d94 commit 149bc71

File tree

1 file changed

+48
-0
lines changed

1 file changed

+48
-0
lines changed

.github/workflows/flash_attention.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: Flash Attention Benchmark
2+
3+
# attempt 0.1
4+
# Run on every commit to the FA repo
5+
# FA repo will run this on every commit
6+
# run: |
7+
# curl -XPOST -u "${{ secrets.PAT_USERNAME}}:${{secrets.PAT_TOKEN}}" -H "Accept: application/vnd.github.everest-preview+json" -H "Content-Type: application/json" https://api.github.com/repos/pytorch/pytorch-integration-testing/dispatches --data '{"event_type": "benchmark_flash_attention"}'
8+
9+
on:
10+
schedule:
11+
# Run every 2 hours
12+
- cron: '0 */2 * * *'
13+
push:
14+
paths:
15+
- .github/workflows/flash_attention.yml
16+
workflow_dispatch: # Allow manual triggering
17+
repository_dispatch:
18+
types: benchmark_flash_attention
19+
jobs:
20+
benchmark-flash-attn:
21+
name: Flash Attention CuTe DSL Benchmark
22+
runs-on: B200
23+
container:
24+
# https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/
25+
image: nvcr.io/nvidia/pytorch:25.06-py3
26+
options: --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864
27+
steps:
28+
- name: CuTe DSL
29+
#with:
30+
# runner: linux.g5.48xlarge.nvidia.gpu
31+
# gpu-arch-type: cuda
32+
# gpu-arch-version: "12.9.1"
33+
# timeout: 600
34+
run: |
35+
set -x
36+
export CUDA_VISIBLE_DEVICES=0
37+
echo "Installing nvidia-cutlass-dsl"
38+
pip install nvidia-cutlass-dsl==4.1.0.dev0
39+
echo "Installing Flash Attention"
40+
git clone https://github.com/Dao-AILab/flash-attention.git fa4
41+
pushd fa4
42+
git log -1
43+
python setup.py build
44+
python setup.py install
45+
export PYTHONPATH=$(pwd)
46+
nvidia-smi
47+
python benchmarks/benchmark_attn.py >> $GITHUB_STEP_SUMMARY
48+
popd

0 commit comments

Comments
 (0)