Skip to content

Create gpu-e2e-test.yml #170

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 9, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions .github/workflows/gpu-e2e-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

name: Run GPU E2E Test
env:
TERRAFORM_AWS_ASSUME_ROLE: ${{ secrets.TERRAFORM_AWS_ASSUME_ROLE }}

on:
workflow_dispatch:
inputs:
addon_name:
required: true
type: string
default: "amazon-cloudwatch-observability"
description: "GPU E2E Test"
addon_version:
required: true
type: string
default: "v1.1.0-eksbuild.1"
description: "EKS addon version"
run_in_beta:
required: true
type: boolean
default: true
description: "Run in EKS Addon Beta environment"

concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}
cancel-in-progress: true

permissions:
id-token: write
contents: read

jobs:
GenerateTestMatrix:
name: 'GenerateTestMatrix'
runs-on: ubuntu-latest
outputs:
eks_addon_matrix: ${{ steps.set-matrix.outputs.eks_addon_matrix }}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ env.TERRAFORM_AWS_ASSUME_ROLE }}
aws-region: us-west-2

- name: Generate matrix
id: set-matrix
run: |
echo "::set-output name=eks_addon_matrix::$(echo $(cat integration-tests/generator/k8s_versions_matrix.json))"

- name: Echo test plan matrix
run: |
echo "eks_addon_matrix: ${{ steps.set-matrix.outputs.eks_addon_matrix }}"
echo "Addon name ${{ github.event.inputs.addon_name }}, addon version ${{ github.event.inputs.addon_version }} "

GPUE2ETest:
needs: [GenerateTestMatrix]
name: GPUE2ETest
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
arrays: ${{ fromJson(needs.GenerateTestMatrix.outputs.eks_addon_matrix) }}
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ env.TERRAFORM_AWS_ASSUME_ROLE }}
aws-region: us-west-2

- name: Confirm EKS Version Support
run: |
if [[
$(go list -m k8s.io/client-go | cut -d ' ' -f 2 | cut -d '.' -f 2) -lt $( echo ${{ matrix.arrays.k8sVersion }} | cut -d '.' -f 2)
|| $(go list -m k8s.io/apimachinery | cut -d ' ' -f 2 | cut -d '.' -f 2) -lt $( echo ${{ matrix.arrays.k8sVersion }} | cut -d '.' -f 2)
|| $(go list -m k8s.io/component-base | cut -d ' ' -f 2 | cut -d '.' -f 2) -lt $( echo ${{ matrix.arrays.k8sVersion }} | cut -d '.' -f 2)
|| $(go list -m k8s.io/kubectl | cut -d ' ' -f 2 | cut -d '.' -f 2) -lt $( echo ${{ matrix.arrays.k8sVersion }} | cut -d '.' -f 2)
]]; then
echo k8s.io/client-go $(go list -m k8s.io/client-go) is less than ${{ matrix.arrays.k8sVersion }}
echo or k8s.io/apimachinery $(go list -m k8s.io/apimachinery) is less than ${{ matrix.arrays.k8sVersion }}
echo or k8s.io/component-base $(go list -m k8s.io/component-base) is less than ${{ matrix.arrays.k8sVersion }}
echo or k8s.io/kubectl $(go list -m k8s.io/kubectl) is less than ${{ matrix.arrays.k8sVersion }}, fail test
echo "please run go get -u && go mod tidy"
exit 1;
fi

- name: Verify Terraform version
run: terraform --version

- name: Terraform apply
uses: nick-fields/retry@v2
with:
max_attempts: 1
timeout_minutes: 60 # EKS takes about 20 minutes to spin up a cluster and service on the cluster
retry_wait_seconds: 5
command: |
cd integration-tests/terraform/gpu

terraform init
if terraform apply -var="beta=${{ github.event.inputs.run_in_beta }}" -var="addon_name=${{ github.event.inputs.addon_name }}" -var="addon_version=${{ github.event.inputs.addon_version }}" -var="k8s_version=${{ matrix.arrays.k8sVersion }}" --auto-approve; then
terraform destroy -var="beta=${{ github.event.inputs.run_in_beta }}" -auto-approve
else
terraform destroy -var="beta=${{ github.event.inputs.run_in_beta }}" -auto-approve && exit 1
fi

- name: Terraform destroy
if: ${{ cancelled() || failure() }}
uses: nick-fields/retry@v2
with:
max_attempts: 3
timeout_minutes: 8
retry_wait_seconds: 5
command: |
cd integration-tests/terraform/gpu

terraform destroy -var="beta=${{ github.event.inputs.run_in_beta }}" --auto-approve