Skip to content

Commit a61df46

Browse files
committed
Update versions
1 parent f35f9a6 commit a61df46

File tree

19 files changed

+27
-27
lines changed

19 files changed

+27
-27
lines changed

Dockerfile.sdk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#
3030

3131
# Base image on the minimum Triton container
32-
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.01-py3-min
32+
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.02-py3-min
3333

3434
ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
3535
ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo

TRITON_VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.55.0dev
1+
2.55.0

deploy/aws/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:25.01-py3
30+
imageName: nvcr.io/nvidia/tritonserver:25.02-py3
3131
pullPolicy: IfNotPresent
3232
modelRepositoryPath: s3://triton-inference-server-repository/model_repository
3333
numGpus: 1

deploy/fleetcommand/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
apiVersion: v1
2828
# appVersion is the Triton version; update when changing release
29-
appVersion: "2.54.0"
29+
appVersion: "2.55.0"
3030
description: Triton Inference Server (Fleet Command)
3131
name: triton-inference-server
3232
# version is the Chart version; update when changing anything in the chart

deploy/fleetcommand/values.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:25.01-py3
30+
imageName: nvcr.io/nvidia/tritonserver:25.02-py3
3131
pullPolicy: IfNotPresent
3232
numGpus: 1
3333
serverCommand: tritonserver
@@ -47,13 +47,13 @@ image:
4747
#
4848
# To set model control mode, uncomment and configure below
4949
# TODO: Fix the following url, it is invalid
50-
# See https://github.com/triton-inference-server/server/blob/r25.01/docs/model_management.md
50+
# See https://github.com/triton-inference-server/server/blob/r25.02/docs/model_management.md
5151
# for more details
5252
#- --model-control-mode=explicit|poll|none
5353
#
5454
# Additional server args
5555
#
56-
# see https://github.com/triton-inference-server/server/blob/r25.01/README.md
56+
# see https://github.com/triton-inference-server/server/blob/r25.02/README.md
5757
# for more details
5858

5959
service:

deploy/gcp/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:25.01-py3
30+
imageName: nvcr.io/nvidia/tritonserver:25.02-py3
3131
pullPolicy: IfNotPresent
3232
modelRepositoryPath: gs://triton-inference-server-repository/model_repository
3333
numGpus: 1

deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ metadata:
3333
namespace: default
3434
spec:
3535
containers:
36-
- image: nvcr.io/nvidia/tritonserver:25.01-py3-sdk
36+
- image: nvcr.io/nvidia/tritonserver:25.02-py3-sdk
3737
imagePullPolicy: Always
3838
name: nv-triton-client
3939
securityContext:

deploy/gke-marketplace-app/server-deployer/build_and_push.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828
export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/')
2929
export APP_NAME=tritonserver
3030
export MAJOR_VERSION=2.53
31-
export MINOR_VERSION=2.54.0
32-
export NGC_VERSION=25.01-py3
31+
export MINOR_VERSION=2.55.0
32+
export NGC_VERSION=25.02-py3
3333

3434
docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION
3535

deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,4 @@ apiVersion: v1
2828
appVersion: "2.53"
2929
description: Triton Inference Server
3030
name: triton-inference-server
31-
version: 2.54.0
31+
version: 2.55.0

deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,14 @@ maxReplicaCount: 3
3131
tritonProtocol: HTTP
3232
# HPA GPU utilization autoscaling target
3333
HPATargetAverageValue: 85
34-
modelRepositoryPath: gs://triton_sample_models/25.01
35-
publishedVersion: '2.54.0'
34+
modelRepositoryPath: gs://triton_sample_models/25.02
35+
publishedVersion: '2.55.0'
3636
gcpMarketplace: true
3737

3838
image:
3939
registry: gcr.io
4040
repository: nvidia-ngc-public/tritonserver
41-
tag: 25.01-py3
41+
tag: 25.02-py3
4242
pullPolicy: IfNotPresent
4343
# modify the model repository here to match your GCP storage bucket
4444
numGpus: 1

deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
x-google-marketplace:
2828
schemaVersion: v2
2929
applicationApiVersion: v1beta1
30-
publishedVersion: '2.54.0'
30+
publishedVersion: '2.55.0'
3131
publishedVersionMetadata:
3232
releaseNote: >-
3333
Initial release.

deploy/gke-marketplace-app/server-deployer/schema.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
x-google-marketplace:
2828
schemaVersion: v2
2929
applicationApiVersion: v1beta1
30-
publishedVersion: '2.54.0'
30+
publishedVersion: '2.55.0'
3131
publishedVersionMetadata:
3232
releaseNote: >-
3333
Initial release.
@@ -89,7 +89,7 @@ properties:
8989
modelRepositoryPath:
9090
type: string
9191
title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, TensorFlow, Pytorch, Onnx ... etc.
92-
default: gs://triton_sample_models/25.01
92+
default: gs://triton_sample_models/25.02
9393
image.ldPreloadPath:
9494
type: string
9595
title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin or Tensorflow custom ops, the compiled shared library must be provided via LD_PRELOAD environment variable.

deploy/gke-marketplace-app/trt-engine/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
```
3434
docker run --gpus all -it --network host \
3535
--shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
36-
-v ~:/scripts nvcr.io/nvidia/tensorrt:25.01-py3
36+
-v ~:/scripts nvcr.io/nvidia/tensorrt:25.02-py3
3737
3838
pip install onnx six torch tf2onnx tensorflow
3939
@@ -57,7 +57,7 @@ mkdir -p engines
5757
5858
python3 builder.py -m models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/model.ckpt -o engines/bert_large_int8_bs1_s128.engine -b 1 -s 128 -c models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/ -v models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/vocab.txt --int8 --fp16 --strict --calib-num 1 -iln -imh
5959
60-
gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/25.01/bert/1/model.plan
60+
gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/25.02/bert/1/model.plan
6161
```
6262

63-
For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/25.01/` should be updated accordingly with the correct version.
63+
For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/25.02/` should be updated accordingly with the correct version.

deploy/k8s-onprem/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ tags:
2929
loadBalancing: true
3030

3131
image:
32-
imageName: nvcr.io/nvidia/tritonserver:25.01-py3
32+
imageName: nvcr.io/nvidia/tritonserver:25.02-py3
3333
pullPolicy: IfNotPresent
3434
modelRepositoryServer: < Replace with the IP Address of your file server >
3535
modelRepositoryPath: /srv/models

deploy/oci/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:25.01-py3
30+
imageName: nvcr.io/nvidia/tritonserver:25.02-py3
3131
pullPolicy: IfNotPresent
3232
modelRepositoryPath: s3://https://<OCI_NAMESPACE>.compat.objectstorage.<OCI_REGION>.oraclecloud.com:443/triton-inference-server-repository
3333
numGpus: 1

python/openai/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
docker run -it --net=host --gpus all --rm \
5252
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
5353
-e HF_TOKEN \
54-
nvcr.io/nvidia/tritonserver:25.01-vllm-python-py3
54+
nvcr.io/nvidia/tritonserver:25.02-vllm-python-py3
5555
```
5656

5757
2. Launch the OpenAI-compatible Triton Inference Server:

qa/common/gen_jetson_trt_models

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
# Make all generated files accessible outside of container
3535
umask 0000
3636
# Set the version of the models
37-
TRITON_VERSION=${TRITON_VERSION:=25.01}
37+
TRITON_VERSION=${TRITON_VERSION:=25.02}
3838
# Set the CUDA device to use
3939
CUDA_DEVICE=${RUNNER_ID:=0}
4040
# Set TensorRT image

qa/common/gen_qa_custom_ops

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
##
3838
############################################################################
3939

40-
TRITON_VERSION=${TRITON_VERSION:=25.01}
40+
TRITON_VERSION=${TRITON_VERSION:=25.02}
4141
NVIDIA_UPSTREAM_VERSION=${NVIDIA_UPSTREAM_VERSION:=$TRITON_VERSION}
4242
TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:$NVIDIA_UPSTREAM_VERSION-tf2-py3}
4343
PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$NVIDIA_UPSTREAM_VERSION-py3}

qa/common/gen_qa_model_repository

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
##
4949
############################################################################
5050

51-
TRITON_VERSION=${TRITON_VERSION:=25.01}
51+
TRITON_VERSION=${TRITON_VERSION:=25.02}
5252

5353
# ONNX. Use ONNX_OPSET 0 to use the default for ONNX version
5454
ONNX_VERSION=1.16.1

0 commit comments

Comments
 (0)