File tree Expand file tree Collapse file tree 1 file changed +46
-0
lines changed Expand file tree Collapse file tree 1 file changed +46
-0
lines changed Original file line number Diff line number Diff line change
1
+ name : Flash Attention Benchmark
2
+
3
+ # To run on every commit to the FA repo, need gto add the following trigger to the FA repo
4
+ # run: |
5
+ # curl -XPOST -u "${{ secrets.PAT_USERNAME}}:${{secrets.PAT_TOKEN}}" -H "Accept: application/vnd.github.everest-preview+json" -H "Content-Type: application/json" https://api.github.com/repos/pytorch/pytorch-integration-testing/dispatches --data '{"event_type": "benchmark_flash_attention"}'
6
+
7
+ on :
8
+ schedule :
9
+ # Run every 2 hours
10
+ - cron : ' 0 */2 * * *'
11
+ push :
12
+ paths :
13
+ - .github/workflows/flash_attention.yml
14
+ repository_dispatch :
15
+ types : benchmark_flash_attention
16
+ workflow_dispatch :
17
+ jobs :
18
+ benchmark-flash-attn :
19
+ name : Flash Attention CuTe DSL Benchmark
20
+ runs-on : B200
21
+ container :
22
+ # https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/
23
+ image : nvcr.io/nvidia/pytorch:25.06-py3
24
+ options : --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864
25
+ steps :
26
+ - name : Buid and Run FlashAttention CuTe DSL
27
+ run : |
28
+ set -x
29
+ export CUDA_VISIBLE_DEVICES=0
30
+ echo "Installing nvidia-cutlass-dsl"
31
+ pip install nvidia-cutlass-dsl==4.1.0.dev0
32
+ echo "Installing Flash Attention"
33
+ rm -fr fa4
34
+ git clone https://github.com/Dao-AILab/flash-attention.git fa4
35
+ pushd fa4
36
+ pwd
37
+ git log -1
38
+ python setup.py install
39
+ export PYTHONPATH=$(pwd)
40
+
41
+ echo '<h1>B200 1000W</h1>' >> $GITHUB_STEP_SUMMARY
42
+ nvidia-smi
43
+ python benchmarks/benchmark_attn.py >> $GITHUB_STEP_SUMMARY
44
+
45
+ popd
46
+ rm -fr fa4
You can’t perform that action at this time.
0 commit comments