File tree Expand file tree Collapse file tree 1 file changed +44
-0
lines changed Expand file tree Collapse file tree 1 file changed +44
-0
lines changed Original file line number Diff line number Diff line change
1
+ name : Flash Attention Benchmark
2
+
3
+ # To remotely trigger a FA Benchmarking run, use the following:
4
+ # curl -XPOST -H "Accept: application/vnd.github.v3+json" -H "Content-Type: application/json" https://api.github.com/repos/pytorch/pytorch-integration-testing/dispatches --data '{"event_type": "benchmark_flash_attention"}'
5
+
6
+ on :
7
+ schedule :
8
+ - cron : " 0 6 * * *" # Run every day at 6AM
9
+ push :
10
+ paths :
11
+ - .github/workflows/flash_attention.yml
12
+ repository_dispatch :
13
+ types : benchmark_flash_attention
14
+ workflow_dispatch :
15
+ jobs :
16
+ benchmark-flash-attn :
17
+ name : Flash Attention CuTe DSL Benchmark
18
+ runs-on : B200
19
+ container :
20
+ # https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/
21
+ image : nvcr.io/nvidia/pytorch:25.06-py3
22
+ options : --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864
23
+ steps :
24
+ - uses : actions/checkout@v4
25
+ with :
26
+ repository : ' Dao-AILab/flash-attention'
27
+ path : ' fa4'
28
+ - name : Install CuTe DSL
29
+ run : |
30
+ set -x
31
+ echo "Installing nvidia-cutlass-dsl"
32
+ pip install nvidia-cutlass-dsl==4.1.0.dev0
33
+ - name : Buid and Run FlashAttention CuTe DSL
34
+ run : |
35
+ set -x
36
+ pushd fa4
37
+ python setup.py install
38
+
39
+ echo '<h1>B200 1000W</h1>' >> $GITHUB_STEP_SUMMARY
40
+ nvidia-smi
41
+ export PYTHONPATH=$(pwd)
42
+ python benchmarks/benchmark_attn.py >> $GITHUB_STEP_SUMMARY
43
+
44
+ popd
You can’t perform that action at this time.
0 commit comments