[CI][Distributed][CUDA][Symm-Mem] Enable B200 Symm Mem Test (#162988) #1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This workflow is dedicated to host slow jobs that are run only periodically because | |
| # they are too slow to run in every commit. The list of slow tests can be found in | |
| # https://github.com/pytorch/test-infra/blob/generated-stats/stats/slow-tests.json | |
| name: slow | |
| on: | |
| push: | |
| branches: | |
| - main | |
| - release/* | |
| tags: | |
| - ciflow/slow/* | |
| schedule: | |
| - cron: 29 8 * * * # about 1:29am PDT, for mem leak check and rerun disabled tests | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}-${{ github.event.schedule }} | |
| cancel-in-progress: true | |
| permissions: | |
| id-token: write | |
| contents: read | |
| jobs: | |
| llm-td: | |
| if: github.repository_owner == 'pytorch' | |
| name: before-test | |
| uses: ./.github/workflows/llm_td_retrieval.yml | |
| permissions: | |
| id-token: write | |
| contents: read | |
| target-determination: | |
| name: before-test | |
| uses: ./.github/workflows/target_determination.yml | |
| needs: llm-td | |
| permissions: | |
| id-token: write | |
| contents: read | |
| get-label-type: | |
| name: get-label-type | |
| uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main | |
| if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} | |
| with: | |
| triggering_actor: ${{ github.triggering_actor }} | |
| issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} | |
| curr_branch: ${{ github.head_ref || github.ref_name }} | |
| curr_ref_type: ${{ github.ref_type }} | |
| linux-jammy-cuda12_8-py3_10-gcc11-sm86-build: | |
| name: linux-jammy-cuda12.8-py3.10-gcc11-sm86 | |
| uses: ./.github/workflows/_linux-build.yml | |
| needs: get-label-type | |
| with: | |
| runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | |
| build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm86 | |
| docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11 | |
| cuda-arch-list: 8.6 | |
| test-matrix: | | |
| { include: [ | |
| { config: "slow", shard: 1, num_shards: 3, runner: "linux.g5.4xlarge.nvidia.gpu" }, | |
| { config: "slow", shard: 2, num_shards: 3, runner: "linux.g5.4xlarge.nvidia.gpu" }, | |
| { config: "slow", shard: 3, num_shards: 3, runner: "linux.g5.4xlarge.nvidia.gpu" }, | |
| ]} | |
| secrets: inherit | |
| linux-jammy-cuda12_8-py3_10-gcc11-sm86-test: | |
| name: linux-jammy-cuda12.8-py3.10-gcc11-sm86 | |
| uses: ./.github/workflows/_linux-test.yml | |
| needs: | |
| - linux-jammy-cuda12_8-py3_10-gcc11-sm86-build | |
| - target-determination | |
| with: | |
| build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm86 | |
| docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm86-build.outputs.docker-image }} | |
| test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm86-build.outputs.test-matrix }} | |
| secrets: inherit | |
| linux-jammy-py3_10-clang12-build: | |
| name: linux-jammy-py3.10-clang12 | |
| uses: ./.github/workflows/_linux-build.yml | |
| needs: get-label-type | |
| with: | |
| runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | |
| build-environment: linux-jammy-py3.10-clang12 | |
| docker-image-name: ci-image:pytorch-linux-jammy-py3.10-clang12 | |
| test-matrix: | | |
| { include: [ | |
| { config: "slow", shard: 1, num_shards: 2, runner: "linux.2xlarge" }, | |
| { config: "slow", shard: 2, num_shards: 2, runner: "linux.2xlarge" }, | |
| ]} | |
| secrets: inherit | |
| linux-jammy-py3_10-clang12-test: | |
| name: linux-jammy-py3.10-clang12 | |
| uses: ./.github/workflows/_linux-test.yml | |
| needs: | |
| - linux-jammy-py3_10-clang12-build | |
| - target-determination | |
| with: | |
| build-environment: linux-jammy-py3.10-clang12 | |
| docker-image: ${{ needs.linux-jammy-py3_10-clang12-build.outputs.docker-image }} | |
| test-matrix: ${{ needs.linux-jammy-py3_10-clang12-build.outputs.test-matrix }} | |
| secrets: inherit | |
| linux-jammy-rocm-py3_10-build: | |
| name: linux-jammy-rocm-py3.10 | |
| uses: ./.github/workflows/_linux-build.yml | |
| needs: get-label-type | |
| with: | |
| runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | |
| build-environment: linux-jammy-rocm-py3.10 | |
| docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3 | |
| test-matrix: | | |
| { include: [ | |
| { config: "slow", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.2", owners: ["module:rocm"] }, | |
| { config: "slow", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.2", owners: ["module:rocm"] }, | |
| ]} | |
| secrets: inherit | |
| linux-jammy-rocm-py3_10-test: | |
| permissions: | |
| id-token: write | |
| contents: read | |
| name: linux-jammy-rocm-py3.10 | |
| uses: ./.github/workflows/_rocm-test.yml | |
| needs: | |
| - linux-jammy-rocm-py3_10-build | |
| - target-determination | |
| with: | |
| build-environment: linux-jammy-rocm-py3.10 | |
| docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }} | |
| test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }} | |
| secrets: inherit | |
| linux-jammy-py3_10-clang18-asan-build: | |
| name: linux-jammy-py3.10-clang18-asan | |
| uses: ./.github/workflows/_linux-build.yml | |
| needs: get-label-type | |
| with: | |
| # More memory is needed to build with asan | |
| runner: linux.2xlarge.memory | |
| runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | |
| build-environment: linux-jammy-py3.10-clang18-asan | |
| docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan | |
| test-matrix: | | |
| { include: [ | |
| { config: "slow", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" }, | |
| { config: "slow", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" }, | |
| { config: "slow", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" }, | |
| ]} | |
| sync-tag: asan-build | |
| secrets: inherit | |
| linux-jammy-py3_10-clang18-asan-test: | |
| name: linux-jammy-py3.10-clang18-asan | |
| uses: ./.github/workflows/_linux-test.yml | |
| needs: | |
| - linux-jammy-py3_10-clang18-asan-build | |
| - target-determination | |
| with: | |
| build-environment: linux-jammy-py3.10-clang18-asan | |
| docker-image: ${{ needs.linux-jammy-py3_10-clang18-asan-build.outputs.docker-image }} | |
| test-matrix: ${{ needs.linux-jammy-py3_10-clang18-asan-build.outputs.test-matrix }} | |
| sync-tag: asan-test | |
| secrets: inherit |