Skip to content

Docker vLLM: Adding docker compose and benchmarking. #2684

Docker vLLM: Adding docker compose and benchmarking.

Docker vLLM: Adding docker compose and benchmarking. #2684

name: Trigger Jenkins Tests
on:
issue_comment:
types: [created]
permissions:
pull-requests: write
statuses: write
actions: read
jobs:
read_codeowners:
name: Check Commenter
runs-on: generic-runner
if: ${{ contains(github.event.comment.body, '/run-gaudi-tests') && github.event.issue.pull_request }}
outputs:
pr_number: ${{ steps.extract_pr.outputs.pr_number }}
pr_branch: ${{ steps.extract_pr.outputs.pr_branch }}
pr_target_branch: ${{ steps.extract_pr.outputs.pr_target_branch }}
pr_sha: ${{ steps.extract_pr.outputs.pr_sha }}
is_version_branch: ${{ steps.extract_pr.outputs.is_version_branch }}
steps:
- name: 'Checkout Repository'
uses: actions/checkout@v4
with:
ref: habana_main
fetch-depth: 0
token: ${{ secrets.GH_PAT }}
- name: Parse Comment
run: |
MAINTAINERS=$(grep -Eh '^[^#]' .github/CODEOWNERS .github/TESTOWNERS | tr -d '@*' | tr '\n' ' ')
COMMENTER=${{ github.event.comment.user.login }}
echo "Maintainers are: ${MAINTAINERS}"
echo "Commenter Is: ${COMMENTER}"
if ! echo "$MAINTAINERS" | grep -q "$COMMENTER"; then
echo "❌ User $COMMENTER is not authorized to trigger tests."
exit 1
fi
- name: Extract PR Number
id: extract_pr
run: |
PR_NUMBER="${{ github.event.issue.number }}"
echo "pr_number=$PR_NUMBER" >> "$GITHUB_OUTPUT"
# It will work only on open PR's
random_string=$(tr -dc 'a-z0-9' </dev/urandom | head -c 10)
pr_temp_branch=$(echo "pr-${PR_NUMBER}-${random_string}")
git fetch origin pull/${PR_NUMBER}/merge
git checkout -b $pr_temp_branch FETCH_HEAD
git push origin $pr_temp_branch
echo "pr_branch=$pr_temp_branch" >> "$GITHUB_OUTPUT"
echo "Parsing The Base Branch"
target_branch=$(curl -sH "Authorization: token ${{ secrets.GH_PAT }}" https://api.github.com/repos/${{github.repository}}/pulls/${PR_NUMBER} | jq -r '.base.ref')
echo "pr_target_branch=$target_branch" >> "$GITHUB_OUTPUT"
pr_sha=$(curl -sH "Authorization: token ${{ secrets.GH_PAT }}" https://api.github.com/repos/${{github.repository}}/pulls/${PR_NUMBER} | jq -r '.head.sha')
echo "pr_sha=$pr_sha" >> $GITHUB_OUTPUT
version_regex='^v([0-9]+)\.([0-9]+)\.([0-9]+)_next$'
is_version_branch=$([[ $target_branch =~ $version_regex ]] && echo true || echo "")
echo "is_version_branch=$is_version_branch" >> "$GITHUB_OUTPUT"
DependencyReview:
name: Dependency Review
runs-on: ubuntu-latest
needs: [read_codeowners]
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
ref: ${{ needs.read_codeowners.outputs.pr_branch }}
token: ${{ secrets.GH_PAT }}
- name: 'Dependency Review'
uses: actions/dependency-review-action@v4
with:
fail-on-severity: high
base-ref: ${{ needs.read_codeowners.outputs.pr_target_branch }}
head-ref: ${{ needs.read_codeowners.outputs.pr_branch }}
CodeQLScan:
name: CodeQL Scan
runs-on: ubuntu-latest
needs: [read_codeowners]
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
ref: ${{ needs.read_codeowners.outputs.pr_branch }}
token: ${{ secrets.GH_PAT }}
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
with:
languages: python
build-mode: none
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
with:
category: "/language:python"
upload: "never"
CalculateJobs:
runs-on: generic-runner
name: Calculate Tests To Trigger
needs: [DependencyReview,CodeQLScan,read_codeowners]
outputs:
tests_list: ${{ steps.tests.outputs.tests_list }}
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
ref: ${{ needs.read_codeowners.outputs.pr_branch }}
token: ${{ secrets.GH_PAT }}
- name: Install YQ
run: |
wget https://github.com/mikefarah/yq/releases/download/v4.14.1/yq_linux_amd64.tar.gz -O - |\
tar xz && sudo mv yq_linux_amd64 /usr/bin/yq
- name: Calculate Tests
id: tests
run: |
test_list=$(yq -oj e .jenkins/test_config.yaml | jq -c "[.stages[].steps[]]")
echo "tests_list=${test_list}" >> "$GITHUB_OUTPUT"
TestRun:
name: Test / ${{matrix.tests.name}}
needs: [CalculateJobs,read_codeowners]
runs-on: generic-runner
strategy:
fail-fast: false
matrix:
tests: ${{ fromJson(needs.CalculateJobs.outputs.tests_list) }}
env:
USERNAME: ${{ secrets.SWUSERNAME }}
PASSWORD: ${{ secrets.SWPASSWORD }}
POD_TEMPLATE: ${{ secrets.POD_TEMPLATE }}
TEST_COMMAND: ${{ matrix.tests.command }}
steps:
- name: Get Job ID
uses: actions/github-script@v7
id: fetch_job_id
with:
script: |
async function getJobIdWithPagination(retries = 3, delay = 5000) {
const run_id = context.runId;
const matrix_test_name = process.env.MATRIX_TEST_NAME;
console.log(`Searching for job: Test / ${matrix_test_name} in run: ${run_id}`);
for (let attempt = 0; attempt < retries; attempt++) {
let page = 1;
let per_page = 100; // Max allowed
let job_id = null;
while (true) {
console.log(`Fetching page ${page}...`);
const response = await github.rest.actions.listJobsForWorkflowRun({
owner: context.repo.owner,
repo: context.repo.repo,
run_id: run_id,
per_page: per_page,
page: page
});
if (response.data.jobs.length === 0) {
console.log(`No more jobs found, stopping at page ${page}.`);
break;
}
console.log(`Fetched ${response.data.jobs.length} jobs from page ${page}`);
// Find the correct job
const job = response.data.jobs.find(j =>
j.name.trim() === `Test / ${matrix_test_name}`
);
if (job) {
console.log(`Found Job ID: ${job.id}`);
job_id = job.id;
break;
}
page++; // Move to the next page
}
if (job_id) return job_id;
console.log(`Job not found, retrying in ${delay / 1000} seconds...`);
await new Promise(res => setTimeout(res, delay));
}
throw new Error(`Job ID not found after ${retries} attempts`);
}
const job_id = await getJobIdWithPagination();
core.setOutput("job_id", job_id);
env:
MATRIX_TEST_NAME: ${{ matrix.tests.name }}
- name: Get Job URL
id: job_url
run: |
url=$(echo "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/job/${{ steps.fetch_job_id.outputs.job_id }}")
echo "url=$url" >> $GITHUB_OUTPUT
- name: Create Commit Status(Pending)
uses: actions/github-script@v7
env:
GIT_SHA: ${{ needs.read_codeowners.outputs.pr_sha }}
TARGET_URL: ${{ steps.job_url.outputs.url }}
JOB_NAME: ${{matrix.tests.name}}
with:
script: |
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: process.env.GIT_SHA,
state: 'pending',
description: `Running ${process.env.JOB_NAME}`,
target_url: process.env.TARGET_URL,
context: `Test / ${process.env.JOB_NAME}`
});
- name: Download Hlctl
run: |
curl --show-error --silent ${{ secrets.HLCTL_ADDRESS }} | bash &> /dev/null
- name: Config Hlctl
run: |
${{ secrets.HLCTL_COMMAND }} &> /dev/null
- name: Create Pod Template
env:
TARGET_BRANCH: ${{ needs.read_codeowners.outputs.pr_target_branch }}
RELEASED_SYNAPSE_VERSION: ${{ vars.RELEASED_SYNAPSE_VERSION }}
BASE_BRANCH: ${{ needs.read_codeowners.outputs.pr_branch }}
IS_VERSION_BRANCH: ${{ needs.read_codeowners.outputs.is_version_branch }}
run: |
LOG_REDIRECTION="\&>"
LOG_REGEX=""
if [ -n "${IS_VERSION_BRANCH}" ]; then
synapse_version=${TARGET_BRANCH#v}
synapse_version=${synapse_version%_*}
synapse_build_endpoint="https://dms.habana-labs.com/api/v1.1/guide/info/${synapse_version}/latest?type=docker-pt"
else
synapse_version=${RELEASED_SYNAPSE_VERSION#v}
LOG_REDIRECTION="'"
LOG_REGEX=".*$"
synapse_build_endpoint="https://dms.habana-labs.com/api/v1.1/branch/info/v${synapse_version}"
fi
echo "Using SynapseAI version ${synapse_version}"
synapse_build=$(curl "${synapse_build_endpoint}" | jq -r ".release_id")
pt_version=${{ vars.PT_VERSION }}
BUILD_TAG="Github-vLLM-Fork-${{ github.event.number }}-${{github.run_number}}"
safe_cmd=${TEST_COMMAND//&/\\&}
echo "Writing Pod Template To File"
echo "${POD_TEMPLATE}" > pod.yml
sed -i "s/##VERSION##/${synapse_version}/g" pod.yml
sed -i "s/##BUILD##/${synapse_build}/g" pod.yml
sed -i "s/##BUILD_TAG##/${BUILD_TAG}/g" pod.yml
sed -i "s/##PYTORCH_VERSION##/${pt_version}/g" pod.yml
sed -i "s|##GIT_BRANCH##|$BASE_BRANCH|g" pod.yml
sed -i "s|##CMD##|$safe_cmd|g" pod.yml
sed -i "s|##LOG_REDIRECTION##$LOG_REGEX|$LOG_REDIRECTION|g" pod.yml
echo "Pod Template Created"
- name: Run Test
env:
IS_VERSION_BRANCH: ${{ needs.read_codeowners.outputs.is_version_branch }}
run: |
random_string=$(tr -dc 'a-z0-9' </dev/urandom | head -c 10)
pod_name="vllm-fork-${{github.event.issue.number}}-${random_string}"
set +e
hlctl create containers \
--file=pod.yml \
--flavor=${{ matrix.tests.flavor}} \
--name="${pod_name}" \
--namespace="framework" \
--retry \
--shm=10240
test_status=$?
set -e
if [ -n "${IS_VERSION_BRANCH}" ]; then
echo "Logs are available at https://logs-browser.k8s-infra.habana-labs.com/files/${pod_name}-tfjob"
fi
exit $test_status
- name: Create Commit Status(Failure)
uses: actions/github-script@v7
if: failure()
env:
GIT_SHA: ${{ needs.read_codeowners.outputs.pr_sha }}
TARGET_URL: ${{ steps.job_url.outputs.url }}
JOB_NAME: ${{matrix.tests.name}}
with:
script: |
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: process.env.GIT_SHA,
state: 'failure',
description: `${process.env.JOB_NAME} Test Failed!`,
target_url: process.env.TARGET_URL,
context: `Test / ${process.env.JOB_NAME}`
});
- name: Create Commit Status(Success)
uses: actions/github-script@v7
if: success()
env:
GIT_SHA: ${{ needs.read_codeowners.outputs.pr_sha }}
TARGET_URL: ${{ steps.job_url.outputs.url }}
JOB_NAME: ${{matrix.tests.name}}
with:
script: |
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: process.env.GIT_SHA,
state: 'success',
description: `${process.env.JOB_NAME} Test Has Finished Successfully`,
target_url: process.env.TARGET_URL,
context: `Test / ${process.env.JOB_NAME}`
});
Summarize:
name: Summarize Test Results
runs-on: generic-runner
needs: [TestRun,read_codeowners]
if: always() && needs.TestRun.result != 'skipped' && needs.read_codeowners.result != 'skipped'
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.GH_PAT }}
# - name: Delete Temp Branch
# run: |
# git push origin --delete ${{ needs.read_codeowners.outputs.pr_branch }}
- name: Check Test Results
run: |
test_result="${{ needs.TestRun.result }}"
echo "Test Finished with status ${test_result}"
if [[ "${test_result}" != "success" ]]; then
exit 1
fi
- name: Create Commit Status(Success)
uses: actions/github-script@v7
if: success()
env:
GIT_SHA: ${{ needs.read_codeowners.outputs.pr_sha }}
with:
script: |
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: process.env.GIT_SHA,
state: 'success',
description: 'All Tests Passed Successfully!',
context: 'Summarize Test Results'
});
- name: Create Commit Status(Failure)
uses: actions/github-script@v7
if: failure()
env:
GIT_SHA: ${{ needs.read_codeowners.outputs.pr_sha }}
with:
script: |
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: process.env.GIT_SHA,
state: 'failure',
description: 'Test Failure! Check Jobs To See Why',
context: 'Summarize Test Results'
});