Skip to content

fix: fix the L0_infer tests for expected num tests #8221

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 18 additions & 14 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ def backend_cmake_args(images, components, be, install_dir, library_paths):

cargs += cmake_backend_extra_args(be)
if be == "tensorrtllm":
cargs.append("-S ../inflight_batcher_llm -B .")
cargs.append("-S ../triton_backend/inflight_batcher_llm -B .")

else:
cargs.append("..")
Expand Down Expand Up @@ -1481,12 +1481,12 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach

if "vllm" in backends:
df += f"""
# Install required packages for vLLM models
ARG BUILD_PUBLIC_VLLM="true"
ARG VLLM_INDEX_URL
ARG PYTORCH_TRITON_URL
ARG NVPL_SLIM_URL

RUN --mount=type=secret,id=req,target=/run/secrets/requirements \\
--mount=type=secret,id=VLLM_INDEX_URL,env=VLLM_INDEX_URL \\
--mount=type=secret,id=PYTORCH_TRITON_URL,env=PYTORCH_TRITON_URL \\
--mount=type=secret,id=NVPL_SLIM_URL,env=NVPL_SLIM_URL \\
if [ "$BUILD_PUBLIC_VLLM" = "false" ]; then \\
if [ "$(uname -m)" = "x86_64" ]; then \\
pip3 install --no-cache-dir \\
Expand Down Expand Up @@ -1900,10 +1900,10 @@ def create_docker_build_script(script_name, container_install_dir, container_ci_
if secrets:
finalargs += [
f"--secret id=req,src={requirements}",
f"--build-arg VLLM_INDEX_URL={vllm_index_url}",
f"--build-arg PYTORCH_TRITON_URL={pytorch_triton_url}",
f"--secret id=VLLM_INDEX_URL",
f"--secret id=PYTORCH_TRITON_URL",
f"--secret id=NVPL_SLIM_URL",
f"--build-arg BUILD_PUBLIC_VLLM={build_public_vllm}",
f"--build-arg NVPL_SLIM_URL={nvpl_slim_url}",
]
finalargs += [
"-t",
Expand Down Expand Up @@ -2081,7 +2081,16 @@ def backend_build(
cmake_script.comment()
cmake_script.mkdir(build_dir)
cmake_script.cwd(build_dir)
cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
if be == "tensorrtllm":
github_organization = (
"https://github.com/NVIDIA"
if "triton-inference-server" in FLAGS.github_organization
else FLAGS.github_organization
)
repository_name = "TensorRT-LLM"
cmake_script.gitclone(repository_name, tag, be, github_organization)
else:
cmake_script.gitclone(backend_repo(be), tag, be, github_organization)

if be == "tensorrtllm":
tensorrtllm_prebuild(cmake_script)
Expand Down Expand Up @@ -2769,8 +2778,6 @@ def enable_all():
metavar=("key", "value"),
help="Add build secrets in the form of <key> <value>. These secrets are used during the build process for vllm. The secrets are passed to the Docker build step as `--secret id=<key>`. The following keys are expected and their purposes are described below:\n\n"
" - 'req': A file containing a list of dependencies for pip (e.g., requirements.txt).\n"
" - 'vllm_index_url': The index URL for the pip install.\n"
" - 'pytorch_triton_url': The location of the PyTorch wheel to download.\n"
" - 'build_public_vllm': A flag (default is 'true') indicating whether to build the public VLLM version.\n\n"
"Ensure that the required environment variables for these secrets are set before running the build.",
)
Expand Down Expand Up @@ -2892,9 +2899,6 @@ def enable_all():
secrets = dict(getattr(FLAGS, "build_secret", []))
if secrets:
requirements = secrets.get("req", "")
vllm_index_url = secrets.get("vllm_index_url", "")
pytorch_triton_url = secrets.get("pytorch_triton_url", "")
nvpl_slim_url = secrets.get("nvpl_slim_url", "")
build_public_vllm = secrets.get("build_public_vllm", "true")
log('Build Arg for BUILD_PUBLIC_VLLM: "{}"'.format(build_public_vllm))

Expand Down
8 changes: 6 additions & 2 deletions qa/L0_backend_python/common.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -85,7 +85,11 @@ create_conda_env_with_specified_path() {
create_python_backend_stub() {
rm -rf python_backend
git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
CUDA_PATH=$(readlink -f /usr/local/cuda)
(cd python_backend/ && mkdir builddir && cd builddir && \
cmake -DTRITON_ENABLE_GPU=ON -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} -DTRITON_BACKEND_REPO_TAG=$TRITON_BACKEND_REPO_TAG -DTRITON_COMMON_REPO_TAG=$TRITON_COMMON_REPO_TAG -DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG -DPYBIND11_PYTHON_VERSION=$PY_VERSION ../ && \
cmake -DTRITON_ENABLE_GPU=ON -DCMAKE_CUDA_COMPILER=$CUDA_PATH/bin/nvcc \
-DCUDAToolkit_ROOT=$CUDA_PATH -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
-DTRITON_BACKEND_REPO_TAG=$TRITON_BACKEND_REPO_TAG -DTRITON_COMMON_REPO_TAG=$TRITON_COMMON_REPO_TAG \
-DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG -DPYBIND11_PYTHON_VERSION=$PY_VERSION ../ && \
make -j18 triton-python-backend-stub)
}
18 changes: 6 additions & 12 deletions qa/L0_backend_python/env/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ conda install numpy=1.26.4 -y
if [ $TRITON_RHEL -eq 1 ]; then
TORCH_VERISON="2.17.0"
fi
conda install torch=${TORCH_VERSION} -y
PY312_VERSION_STRING="Python version is 3.12, NumPy version is 1.26.4, and PyTorch version is ${TORCH_VERISON}"
conda install pytorch=${TORCH_VERSION} -y
PY312_VERSION_STRING="Python version is 3.12, NumPy version is 1.26.4, and PyTorch version is ${TORCH_VERSION}"
conda pack -o python3.12.tar.gz
mkdir -p models/python_3_12/1/
cp ../../python_models/python_version/config.pbtxt ./models/python_3_12
Expand Down Expand Up @@ -122,7 +122,7 @@ fi
kill_server

set +e
grep "Locale is ('en_US', 'UTF-8')" $SERVER_LOG
grep "Locale is ('C', 'UTF-8')" $SERVER_LOG
if [ $? -ne 0 ]; then
cat $SERVER_LOG
echo -e "\n***\n*** Locale UTF-8 was not found in Triton logs. \n***"
Expand Down Expand Up @@ -182,10 +182,6 @@ aws s3 mb "${BUCKET_URL}"
BUCKET_URL=${BUCKET_URL%/}
BUCKET_URL_SLASH="${BUCKET_URL}/"

# Remove Python 3.7 model because it contains absolute paths and cannot be used
# with S3.
rm -rf models/python_3_7

# Test with the bucket url as model repository
aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*"

Expand All @@ -205,10 +201,10 @@ fi
kill_server

set +e
grep "$PY36_VERSION_STRING" $SERVER_LOG
grep "$PY312_VERSION_STRING" $SERVER_LOG
if [ $? -ne 0 ]; then
cat $SERVER_LOG
echo -e "\n***\n*** $PY36_VERSION_STRING was not found in Triton logs. \n***"
echo -e "\n***\n*** $PY312_VERSION_STRING was not found in Triton logs. \n***"
RET=1
fi
set -e
Expand All @@ -217,8 +213,6 @@ set -e
aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"

# Test with EXECUTION_ENV_PATH outside the model directory
sed -i "s/TRITON_MODEL_DIRECTORY\/python_3_6_environment/TRITON_MODEL_DIRECTORY\/..\/python_3_6_environment/" models/python_3_6/config.pbtxt
mv models/python_3_6/python_3_6_environment.tar.gz models
sed -i "s/\$\$TRITON_MODEL_DIRECTORY\/python_3_12_environment/s3:\/\/triton-bucket-${CI_JOB_ID}\/python_3_12_environment/" models/python_3_12/config.pbtxt
mv models/python_3_12/python_3_12_environment.tar.gz models

Expand All @@ -238,7 +232,7 @@ fi
kill_server

set +e
for EXPECTED_VERSION_STRING in "$PY36_VERSION_STRING" "$PY312_VERSION_STRING"; do
for EXPECTED_VERSION_STRING in "$PY312_VERSION_STRING"; do
grep "$EXPECTED_VERSION_STRING" $SERVER_LOG
if [ $? -ne 0 ]; then
cat $SERVER_LOG
Expand Down
4 changes: 2 additions & 2 deletions qa/L0_backend_python/examples/test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -69,7 +69,7 @@ pip3 install validators
# Install JAX
# Jax has dropped the support for Python 3.8. See https://jax.readthedocs.io/en/latest/changelog.html
if [ "$TEST_JETSON" == "0" ] && [ ${PYTHON_ENV_VERSION} != "8" ]; then
pip3 install --upgrade "jax[cuda12_local]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
pip install -U "jax[cuda12]"
fi

git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
Expand Down
2 changes: 1 addition & 1 deletion qa/L0_backend_python/setup_python_enviroment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ conda update -n base -c defaults conda -y
# been setup correctly.
if [ ${PYTHON_ENV_VERSION} = "11" ]; then
create_conda_env "3.11" "python-3-11"
conda install torch=2.6.0 -y
conda install pytorch=2.6.0 -y
conda install -c conda-forge libstdcxx-ng=14 -y
conda install numpy=1.23.5 -y
EXPECTED_VERSION_STRING="Python version is 3.11, NumPy version is 1.23.5, and PyTorch version is 2.6.0"
Expand Down
2 changes: 1 addition & 1 deletion qa/L0_infer/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ fi
if [ "$TEST_SYSTEM_SHARED_MEMORY" -eq 1 ] || [ "$TEST_CUDA_SHARED_MEMORY" -eq 1 ]; then
EXPECTED_NUM_TESTS=${EXPECTED_NUM_TESTS:="33"}
else
EXPECTED_NUM_TESTS=${EXPECTED_NUM_TESTS:="44"}
EXPECTED_NUM_TESTS=${EXPECTED_NUM_TESTS:="46"}
fi

TEST_JETSON=${TEST_JETSON:=0}
Expand Down
Loading