File tree Expand file tree Collapse file tree 1 file changed +48
-0
lines changed Expand file tree Collapse file tree 1 file changed +48
-0
lines changed Original file line number Diff line number Diff line change
1
+ name : Flash Attention Benchmark
2
+
3
+ # attempt 0.1
4
+ # Run on every commit to the FA repo
5
+ # FA repo will run this on every commit
6
+ # run: |
7
+ # curl -XPOST -u "${{ secrets.PAT_USERNAME}}:${{secrets.PAT_TOKEN}}" -H "Accept: application/vnd.github.everest-preview+json" -H "Content-Type: application/json" https://api.github.com/repos/pytorch/pytorch-integration-testing/dispatches --data '{"event_type": "benchmark_flash_attention"}'
8
+
9
+ on :
10
+ schedule :
11
+ # Run every 2 hours
12
+ - cron : ' 0 */2 * * *'
13
+ push :
14
+ paths :
15
+ - .github/workflows/flash_attention.yml
16
+ workflow_dispatch : # Allow manual triggering
17
+ repository_dispatch :
18
+ types : benchmark_flash_attention
19
+ jobs :
20
+ benchmark-flash-attn :
21
+ name : Flash Attention CuTe DSL Benchmark
22
+ runs-on : B200
23
+ container :
24
+ # https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/
25
+ image : nvcr.io/nvidia/pytorch:25.06-py3
26
+ options : --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864
27
+ steps :
28
+ - name : CuTe DSL
29
+ # with:
30
+ # runner: linux.g5.48xlarge.nvidia.gpu
31
+ # gpu-arch-type: cuda
32
+ # gpu-arch-version: "12.9.1"
33
+ # timeout: 600
34
+ run : |
35
+ set -x
36
+ export CUDA_VISIBLE_DEVICES=0
37
+ echo "Installing nvidia-cutlass-dsl"
38
+ pip install nvidia-cutlass-dsl==4.1.0.dev0
39
+ echo "Installing Flash Attention"
40
+ git clone https://github.com/Dao-AILab/flash-attention.git fa4
41
+ pushd fa4
42
+ git log -1
43
+ python setup.py build
44
+ python setup.py install
45
+ export PYTHONPATH=$(pwd)
46
+ nvidia-smi
47
+ python benchmarks/benchmark_attn.py >> $GITHUB_STEP_SUMMARY
48
+ popd
You can’t perform that action at this time.
0 commit comments