DP: Fix init_device for DP #2671
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Trigger Jenkins Tests | |
on: | |
issue_comment: | |
types: [created] | |
permissions: | |
pull-requests: write | |
statuses: write | |
actions: read | |
jobs: | |
read_codeowners: | |
name: Check Commenter | |
runs-on: generic-runner | |
if: ${{ contains(github.event.comment.body, '/run-gaudi-tests') && github.event.issue.pull_request }} | |
outputs: | |
pr_number: ${{ steps.extract_pr.outputs.pr_number }} | |
pr_branch: ${{ steps.extract_pr.outputs.pr_branch }} | |
pr_target_branch: ${{ steps.extract_pr.outputs.pr_target_branch }} | |
pr_sha: ${{ steps.extract_pr.outputs.pr_sha }} | |
is_version_branch: ${{ steps.extract_pr.outputs.is_version_branch }} | |
steps: | |
- name: 'Checkout Repository' | |
uses: actions/checkout@v4 | |
with: | |
ref: habana_main | |
fetch-depth: 0 | |
token: ${{ secrets.GH_PAT }} | |
- name: Parse Comment | |
run: | | |
MAINTAINERS=$(grep -Eh '^[^#]' .github/CODEOWNERS .github/TESTOWNERS | tr -d '@*' | tr '\n' ' ') | |
COMMENTER=${{ github.event.comment.user.login }} | |
echo "Maintainers are: ${MAINTAINERS}" | |
echo "Commenter Is: ${COMMENTER}" | |
if ! echo "$MAINTAINERS" | grep -q "$COMMENTER"; then | |
echo "❌ User $COMMENTER is not authorized to trigger tests." | |
exit 1 | |
fi | |
- name: Extract PR Number | |
id: extract_pr | |
run: | | |
PR_NUMBER="${{ github.event.issue.number }}" | |
echo "pr_number=$PR_NUMBER" >> "$GITHUB_OUTPUT" | |
# It will work only on open PR's | |
random_string=$(tr -dc 'a-z0-9' </dev/urandom | head -c 10) | |
pr_temp_branch=$(echo "pr-${PR_NUMBER}-${random_string}") | |
git fetch origin pull/${PR_NUMBER}/merge | |
git checkout -b $pr_temp_branch FETCH_HEAD | |
git push origin $pr_temp_branch | |
echo "pr_branch=$pr_temp_branch" >> "$GITHUB_OUTPUT" | |
echo "Parsing The Base Branch" | |
target_branch=$(curl -sH "Authorization: token ${{ secrets.GH_PAT }}" https://api.github.com/repos/${{github.repository}}/pulls/${PR_NUMBER} | jq -r '.base.ref') | |
echo "pr_target_branch=$target_branch" >> "$GITHUB_OUTPUT" | |
pr_sha=$(curl -sH "Authorization: token ${{ secrets.GH_PAT }}" https://api.github.com/repos/${{github.repository}}/pulls/${PR_NUMBER} | jq -r '.head.sha') | |
echo "pr_sha=$pr_sha" >> $GITHUB_OUTPUT | |
version_regex='^v([0-9]+)\.([0-9]+)\.([0-9]+)_next$' | |
is_version_branch=$([[ $target_branch =~ $version_regex ]] && echo true || echo "") | |
echo "is_version_branch=$is_version_branch" >> "$GITHUB_OUTPUT" | |
DependencyReview: | |
name: Dependency Review | |
runs-on: ubuntu-latest | |
needs: [read_codeowners] | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ needs.read_codeowners.outputs.pr_branch }} | |
token: ${{ secrets.GH_PAT }} | |
- name: 'Dependency Review' | |
uses: actions/dependency-review-action@v4 | |
with: | |
fail-on-severity: high | |
base-ref: ${{ needs.read_codeowners.outputs.pr_target_branch }} | |
head-ref: ${{ needs.read_codeowners.outputs.pr_branch }} | |
CodeQLScan: | |
name: CodeQL Scan | |
runs-on: ubuntu-latest | |
needs: [read_codeowners] | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ needs.read_codeowners.outputs.pr_branch }} | |
token: ${{ secrets.GH_PAT }} | |
- name: Initialize CodeQL | |
uses: github/codeql-action/init@v3 | |
with: | |
languages: python | |
build-mode: none | |
- name: Perform CodeQL Analysis | |
uses: github/codeql-action/analyze@v3 | |
with: | |
category: "/language:python" | |
upload: "never" | |
CalculateJobs: | |
runs-on: generic-runner | |
name: Calculate Tests To Trigger | |
needs: [DependencyReview,CodeQLScan,read_codeowners] | |
outputs: | |
tests_list: ${{ steps.tests.outputs.tests_list }} | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ needs.read_codeowners.outputs.pr_branch }} | |
token: ${{ secrets.GH_PAT }} | |
- name: Install YQ | |
run: | | |
wget https://github.com/mikefarah/yq/releases/download/v4.14.1/yq_linux_amd64.tar.gz -O - |\ | |
tar xz && sudo mv yq_linux_amd64 /usr/bin/yq | |
- name: Calculate Tests | |
id: tests | |
run: | | |
test_list=$(yq -oj e .jenkins/test_config.yaml | jq -c "[.stages[].steps[]]") | |
echo "tests_list=${test_list}" >> "$GITHUB_OUTPUT" | |
TestRun: | |
name: Test / ${{matrix.tests.name}} | |
needs: [CalculateJobs,read_codeowners] | |
runs-on: generic-runner | |
strategy: | |
fail-fast: false | |
matrix: | |
tests: ${{ fromJson(needs.CalculateJobs.outputs.tests_list) }} | |
env: | |
USERNAME: ${{ secrets.SWUSERNAME }} | |
PASSWORD: ${{ secrets.SWPASSWORD }} | |
POD_TEMPLATE: ${{ secrets.POD_TEMPLATE }} | |
TEST_COMMAND: ${{ matrix.tests.command }} | |
steps: | |
- name: Get Job ID | |
uses: actions/github-script@v7 | |
id: fetch_job_id | |
with: | |
script: | | |
async function getJobIdWithPagination(retries = 3, delay = 5000) { | |
const run_id = context.runId; | |
const matrix_test_name = process.env.MATRIX_TEST_NAME; | |
console.log(`Searching for job: Test / ${matrix_test_name} in run: ${run_id}`); | |
for (let attempt = 0; attempt < retries; attempt++) { | |
let page = 1; | |
let per_page = 100; // Max allowed | |
let job_id = null; | |
while (true) { | |
console.log(`Fetching page ${page}...`); | |
const response = await github.rest.actions.listJobsForWorkflowRun({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
run_id: run_id, | |
per_page: per_page, | |
page: page | |
}); | |
if (response.data.jobs.length === 0) { | |
console.log(`No more jobs found, stopping at page ${page}.`); | |
break; | |
} | |
console.log(`Fetched ${response.data.jobs.length} jobs from page ${page}`); | |
// Find the correct job | |
const job = response.data.jobs.find(j => | |
j.name.trim() === `Test / ${matrix_test_name}` | |
); | |
if (job) { | |
console.log(`Found Job ID: ${job.id}`); | |
job_id = job.id; | |
break; | |
} | |
page++; // Move to the next page | |
} | |
if (job_id) return job_id; | |
console.log(`Job not found, retrying in ${delay / 1000} seconds...`); | |
await new Promise(res => setTimeout(res, delay)); | |
} | |
throw new Error(`Job ID not found after ${retries} attempts`); | |
} | |
const job_id = await getJobIdWithPagination(); | |
core.setOutput("job_id", job_id); | |
env: | |
MATRIX_TEST_NAME: ${{ matrix.tests.name }} | |
- name: Get Job URL | |
id: job_url | |
run: | | |
url=$(echo "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/job/${{ steps.fetch_job_id.outputs.job_id }}") | |
echo "url=$url" >> $GITHUB_OUTPUT | |
- name: Create Commit Status(Pending) | |
uses: actions/github-script@v7 | |
env: | |
GIT_SHA: ${{ needs.read_codeowners.outputs.pr_sha }} | |
TARGET_URL: ${{ steps.job_url.outputs.url }} | |
JOB_NAME: ${{matrix.tests.name}} | |
with: | |
script: | | |
await github.rest.repos.createCommitStatus({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
sha: process.env.GIT_SHA, | |
state: 'pending', | |
description: `Running ${process.env.JOB_NAME}`, | |
target_url: process.env.TARGET_URL, | |
context: `Test / ${process.env.JOB_NAME}` | |
}); | |
- name: Download Hlctl | |
run: | | |
curl --show-error --silent ${{ secrets.HLCTL_ADDRESS }} | bash &> /dev/null | |
- name: Config Hlctl | |
run: | | |
${{ secrets.HLCTL_COMMAND }} &> /dev/null | |
- name: Create Pod Template | |
env: | |
TARGET_BRANCH: ${{ needs.read_codeowners.outputs.pr_target_branch }} | |
RELEASED_SYNAPSE_VERSION: ${{ vars.RELEASED_SYNAPSE_VERSION }} | |
BASE_BRANCH: ${{ needs.read_codeowners.outputs.pr_branch }} | |
IS_VERSION_BRANCH: ${{ needs.read_codeowners.outputs.is_version_branch }} | |
run: | | |
LOG_REDIRECTION="\&>" | |
LOG_REGEX="" | |
if [ -n "${IS_VERSION_BRANCH}" ]; then | |
synapse_version=${TARGET_BRANCH#v} | |
synapse_version=${synapse_version%_*} | |
synapse_build_endpoint="https://dms.habana-labs.com/api/v1.1/guide/info/${synapse_version}/latest?type=docker-pt" | |
else | |
synapse_version=${RELEASED_SYNAPSE_VERSION#v} | |
LOG_REDIRECTION="'" | |
LOG_REGEX=".*$" | |
synapse_build_endpoint="https://dms.habana-labs.com/api/v1.1/branch/info/v${synapse_version}" | |
fi | |
echo "Using SynapseAI version ${synapse_version}" | |
synapse_build=$(curl "${synapse_build_endpoint}" | jq -r ".release_id") | |
pt_version=${{ vars.PT_VERSION }} | |
BUILD_TAG="Github-vLLM-Fork-${{ github.event.number }}-${{github.run_number}}" | |
safe_cmd=${TEST_COMMAND//&/\\&} | |
echo "Writing Pod Template To File" | |
echo "${POD_TEMPLATE}" > pod.yml | |
sed -i "s/##VERSION##/${synapse_version}/g" pod.yml | |
sed -i "s/##BUILD##/${synapse_build}/g" pod.yml | |
sed -i "s/##BUILD_TAG##/${BUILD_TAG}/g" pod.yml | |
sed -i "s/##PYTORCH_VERSION##/${pt_version}/g" pod.yml | |
sed -i "s|##GIT_BRANCH##|$BASE_BRANCH|g" pod.yml | |
sed -i "s|##CMD##|$safe_cmd|g" pod.yml | |
sed -i "s|##LOG_REDIRECTION##$LOG_REGEX|$LOG_REDIRECTION|g" pod.yml | |
echo "Pod Template Created" | |
- name: Run Test | |
env: | |
IS_VERSION_BRANCH: ${{ needs.read_codeowners.outputs.is_version_branch }} | |
run: | | |
random_string=$(tr -dc 'a-z0-9' </dev/urandom | head -c 10) | |
pod_name="vllm-fork-${{github.event.issue.number}}-${random_string}" | |
set +e | |
hlctl create containers \ | |
--file=pod.yml \ | |
--flavor=${{ matrix.tests.flavor}} \ | |
--name="${pod_name}" \ | |
--namespace="framework" \ | |
--retry \ | |
--shm=10240 | |
test_status=$? | |
set -e | |
if [ -n "${IS_VERSION_BRANCH}" ]; then | |
echo "Logs are available at https://logs-browser.k8s-infra.habana-labs.com/files/${pod_name}-tfjob" | |
fi | |
exit $test_status | |
- name: Create Commit Status(Failure) | |
uses: actions/github-script@v7 | |
if: failure() | |
env: | |
GIT_SHA: ${{ needs.read_codeowners.outputs.pr_sha }} | |
TARGET_URL: ${{ steps.job_url.outputs.url }} | |
JOB_NAME: ${{matrix.tests.name}} | |
with: | |
script: | | |
await github.rest.repos.createCommitStatus({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
sha: process.env.GIT_SHA, | |
state: 'failure', | |
description: `${process.env.JOB_NAME} Test Failed!`, | |
target_url: process.env.TARGET_URL, | |
context: `Test / ${process.env.JOB_NAME}` | |
}); | |
- name: Create Commit Status(Success) | |
uses: actions/github-script@v7 | |
if: success() | |
env: | |
GIT_SHA: ${{ needs.read_codeowners.outputs.pr_sha }} | |
TARGET_URL: ${{ steps.job_url.outputs.url }} | |
JOB_NAME: ${{matrix.tests.name}} | |
with: | |
script: | | |
await github.rest.repos.createCommitStatus({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
sha: process.env.GIT_SHA, | |
state: 'success', | |
description: `${process.env.JOB_NAME} Test Has Finished Successfully`, | |
target_url: process.env.TARGET_URL, | |
context: `Test / ${process.env.JOB_NAME}` | |
}); | |
Summarize: | |
name: Summarize Test Results | |
runs-on: generic-runner | |
needs: [TestRun,read_codeowners] | |
if: always() && needs.TestRun.result != 'skipped' && needs.read_codeowners.result != 'skipped' | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
token: ${{ secrets.GH_PAT }} | |
# - name: Delete Temp Branch | |
# run: | | |
# git push origin --delete ${{ needs.read_codeowners.outputs.pr_branch }} | |
- name: Check Test Results | |
run: | | |
test_result="${{ needs.TestRun.result }}" | |
echo "Test Finished with status ${test_result}" | |
if [[ "${test_result}" != "success" ]]; then | |
exit 1 | |
fi | |
- name: Create Commit Status(Success) | |
uses: actions/github-script@v7 | |
if: success() | |
env: | |
GIT_SHA: ${{ needs.read_codeowners.outputs.pr_sha }} | |
with: | |
script: | | |
await github.rest.repos.createCommitStatus({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
sha: process.env.GIT_SHA, | |
state: 'success', | |
description: 'All Tests Passed Successfully!', | |
context: 'Summarize Test Results' | |
}); | |
- name: Create Commit Status(Failure) | |
uses: actions/github-script@v7 | |
if: failure() | |
env: | |
GIT_SHA: ${{ needs.read_codeowners.outputs.pr_sha }} | |
with: | |
script: | | |
await github.rest.repos.createCommitStatus({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
sha: process.env.GIT_SHA, | |
state: 'failure', | |
description: 'Test Failure! Check Jobs To See Why', | |
context: 'Summarize Test Results' | |
}); | |