Skip to content

Commit 1f6aed2

Browse files
mc-nvnv-kmcgill53ziqifan617yinggeh
authored
TPRD-1326: merge kmcgill-remove-tf (#8113)
Co-authored-by: Kyle McGill <kmcgill@nvidia.com> Co-authored-by: Ziqi Fan <ziqif@nvidia.com> Co-authored-by: Yingge He <yinggeh@nvidia.com> Co-authored-by: Yingge He <157551214+yinggeh@users.noreply.github.com>
1 parent 3bca828 commit 1f6aed2

File tree

231 files changed

+1196
-7972
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

231 files changed

+1196
-7972
lines changed

Dockerfile.QA

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ RUN apt-get update && \
6161
python3-pip \
6262
python3-wheel \
6363
python3-setuptools \
64+
python3-venv \
6465
rapidjson-dev \
6566
software-properties-common && \
6667
rm -rf /var/lib/apt/lists/*
@@ -74,12 +75,19 @@ RUN apt update -q=2 \
7475
&& apt-get install -y --no-install-recommends cmake=3.28.3* cmake-data=3.28.3*
7576

7677
# Add inception_graphdef model to example repo
78+
# FIXME: This should be changed to using the fetch_models.sh script
79+
# in order to ensure the public facing docs are up-to-date.
7780
WORKDIR /workspace/docs/examples/model_repository
78-
RUN mkdir -p inception_graphdef/1 && \
79-
wget -O ${TRITONTMP_DIR}/inception_v3_2016_08_28_frozen.pb.tar.gz \
80-
https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz && \
81-
(cd ${TRITONTMP_DIR} && tar xzf inception_v3_2016_08_28_frozen.pb.tar.gz) && \
82-
mv ${TRITONTMP_DIR}/inception_v3_2016_08_28_frozen.pb inception_graphdef/1/model.graphdef
81+
RUN mkdir -p model_repository/inception_onnx/1 && \
82+
wget -O /tmp/inception_v3_2016_08_28_frozen.pb.tar.gz \
83+
https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz && \
84+
(cd /tmp && tar xzf inception_v3_2016_08_28_frozen.pb.tar.gz) && \
85+
python3 -m venv tf2onnx && \
86+
source ./tf2onnx/bin/activate && \
87+
pip3 install "numpy<2" tensorflow tf2onnx && \
88+
python3 -m tf2onnx.convert --graphdef /tmp/inception_v3_2016_08_28_frozen.pb --output inception_v3_onnx.model.onnx --inputs input:0 --outputs InceptionV3/Predictions/Softmax:0 && \
89+
deactivate && \
90+
mv inception_v3_onnx.model.onnx model_repository/inception_onnx/1/model.onnx
8391

8492
# Update the qa/ directory with test executables, models, etc.
8593
WORKDIR /workspace
@@ -109,7 +117,7 @@ RUN mkdir -p qa/common && \
109117
cp -r docs/examples/model_repository/simple_identity qa/L0_grpc/models && \
110118
cp -r docs/examples/model_repository/simple_sequence qa/L0_grpc/models && \
111119
cp -r docs/examples/model_repository/simple_string qa/L0_grpc/models && \
112-
cp -r docs/examples/model_repository/inception_graphdef qa/L0_grpc/models && \
120+
cp -r docs/examples/model_repository/inception_onnx qa/L0_grpc/models && \
113121
mkdir qa/L0_grpc_state_cleanup/models && \
114122
cp -r /workspace/src/test/models/repeat_int32 qa/L0_grpc_state_cleanup/models/ && \
115123
mkdir qa/L0_http/models && \
@@ -118,7 +126,7 @@ RUN mkdir -p qa/common && \
118126
cp -r docs/examples/model_repository/simple_identity qa/L0_http/models && \
119127
cp -r docs/examples/model_repository/simple_sequence qa/L0_http/models && \
120128
cp -r docs/examples/model_repository/simple_string qa/L0_http/models && \
121-
cp -r docs/examples/model_repository/inception_graphdef qa/L0_http/models && \
129+
cp -r docs/examples/model_repository/inception_onnx qa/L0_grpc/models && \
122130
mkdir qa/L0_https/models && \
123131
cp -r docs/examples/model_repository/simple qa/L0_https/models/. && \
124132
mkdir qa/L0_secure_grpc/models && \
@@ -149,21 +157,20 @@ RUN mkdir -p qa/common && \
149157
cp bin/triton_json_test qa/L0_json/. && \
150158
cp bin/backend_output_detail_test qa/L0_backend_output_detail/. && \
151159
cp -r deploy/mlflow-triton-plugin qa/L0_mlflow/. && \
152-
cp bin/input_byte_size_test qa/L0_input_validation/. && \
153-
cp -r docs/examples/model_repository/simple_identity qa/L0_input_validation/models
160+
cp bin/input_byte_size_test qa/L0_input_validation/.
154161

155162
RUN mkdir -p qa/pkgs && \
156163
cp python/triton*.whl qa/pkgs/. && \
157164
cp -rf python/test/. qa/L0_python_api/.
158165

159166
RUN mkdir -p qa/L0_simple_ensemble/models/simple/1 && \
160-
cp docs/examples/model_repository/simple/1/model.graphdef \
167+
cp docs/examples/model_repository/simple/1/model.onnx \
161168
qa/L0_simple_ensemble/models/simple/1/. && \
162169
mkdir -p qa/L0_simple_ensemble/models/simple/2 && \
163-
cp docs/examples/model_repository/simple/1/model.graphdef \
170+
cp docs/examples/model_repository/simple/1/model.onnx \
164171
qa/L0_simple_ensemble/models/simple/2/. && \
165172
mkdir -p qa/L0_socket/models/simple/1 && \
166-
cp docs/examples/model_repository/simple/1/model.graphdef \
173+
cp docs/examples/model_repository/simple/1/model.onnx \
167174
qa/L0_socket/models/simple/1/.
168175

169176
RUN mkdir -p qa/L0_backend_identity/models && \

build.py

Lines changed: 9 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -562,8 +562,6 @@ def backend_cmake_args(images, components, be, install_dir, library_paths):
562562
args = onnxruntime_cmake_args(images, library_paths)
563563
elif be == "openvino":
564564
args = openvino_cmake_args()
565-
elif be == "tensorflow":
566-
args = tensorflow_cmake_args(images, library_paths)
567565
elif be == "python":
568566
args = python_cmake_args()
569567
elif be == "dali":
@@ -795,23 +793,6 @@ def tensorrt_cmake_args():
795793
return cargs
796794

797795

798-
def tensorflow_cmake_args(images, library_paths):
799-
backend_name = "tensorflow"
800-
extra_args = []
801-
802-
# If a specific TF image is specified use it, otherwise pull from NGC.
803-
if backend_name in images:
804-
image = images[backend_name]
805-
else:
806-
image = "nvcr.io/nvidia/tensorflow:{}-tf2-py3".format(
807-
FLAGS.upstream_container_version
808-
)
809-
extra_args = [
810-
cmake_backend_arg(backend_name, "TRITON_TENSORFLOW_DOCKER_IMAGE", None, image)
811-
]
812-
return extra_args
813-
814-
815796
def dali_cmake_args():
816797
return [
817798
cmake_backend_enable("dali", "TRITON_DALI_SKIP_DOWNLOAD", False),
@@ -1233,10 +1214,10 @@ def create_dockerfile_linux(
12331214
argmap["BASE_IMAGE"],
12341215
)
12351216

1236-
# PyTorch and TensorFlow backends need extra CUDA and other
1217+
# PyTorch backends need extra CUDA and other
12371218
# dependencies during runtime that are missing in the CPU-only base container.
12381219
# These dependencies must be copied from the Triton Min image.
1239-
if not FLAGS.enable_gpu and (("pytorch" in backends) or ("tensorflow" in backends)):
1220+
if not FLAGS.enable_gpu and ("pytorch" in backends):
12401221
df += """
12411222
############################################################################
12421223
## Triton Min image
@@ -1602,10 +1583,10 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
16021583
cuda_arch=cuda_arch, libs_arch=libs_arch
16031584
)
16041585

1605-
if ("pytorch" in backends) or ("tensorflow" in backends):
1606-
# Add NCCL dependency for tensorflow/pytorch backend.
1586+
if "pytorch" in backends:
1587+
# Add NCCL dependency for pytorch backend.
16071588
# Note: Even though the build is CPU-only, the version of
1608-
# tensorflow/pytorch we are using depends upon the NCCL library.
1589+
# pytorch we are using depends upon the NCCL library.
16091590
# Since this dependency is not present in the ubuntu base image,
16101591
# we must copy it from the Triton min container ourselves.
16111592
df += """
@@ -1720,11 +1701,10 @@ def create_build_dockerfiles(
17201701
}
17211702

17221703
# For CPU-only image we need to copy some cuda libraries and dependencies
1723-
# since we are using PyTorch and TensorFlow containers that
1724-
# are not CPU-only.
1704+
# since we are using PyTorch containers that are not CPU-only.
17251705
if (
17261706
not FLAGS.enable_gpu
1727-
and (("pytorch" in backends) or ("tensorflow" in backends))
1707+
and ("pytorch" in backends)
17281708
and (target_platform() != "windows")
17291709
):
17301710
if "gpu-base" in images:
@@ -2351,7 +2331,6 @@ def enable_all():
23512331
"identity",
23522332
"square",
23532333
"repeat",
2354-
"tensorflow",
23552334
"onnxruntime",
23562335
"python",
23572336
"dali",
@@ -2586,7 +2565,7 @@ def enable_all():
25862565
"--image",
25872566
action="append",
25882567
required=False,
2589-
help='Use specified Docker image in build as <image-name>,<full-image-name>. <image-name> can be "base", "gpu-base", "tensorflow", or "pytorch".',
2568+
help='Use specified Docker image in build as <image-name>,<full-image-name>. <image-name> can be "base", "gpu-base", or "pytorch".',
25902569
)
25912570

25922571
parser.add_argument(
@@ -2887,12 +2866,6 @@ def enable_all():
28872866
parts = be.split(":")
28882867
if len(parts) == 1:
28892868
parts.append(default_repo_tag)
2890-
if parts[0] == "tensorflow1":
2891-
fail(
2892-
"Starting from Triton version 23.04, support for TensorFlow 1 has been discontinued. Please switch to Tensorflow 2."
2893-
)
2894-
if parts[0] == "tensorflow2":
2895-
parts[0] = "tensorflow"
28962869
log('backend "{}" at tag/branch "{}"'.format(parts[0], parts[1]))
28972870
backends[parts[0]] = parts[1]
28982871

@@ -2939,13 +2912,10 @@ def enable_all():
29392912
len(parts) != 2, "--image must specify <image-name>,<full-image-registry>"
29402913
)
29412914
fail_if(
2942-
parts[0]
2943-
not in ["base", "gpu-base", "pytorch", "tensorflow", "tensorflow2"],
2915+
parts[0] not in ["base", "gpu-base", "pytorch"],
29442916
"unsupported value for --image",
29452917
)
29462918
log('image "{}": "{}"'.format(parts[0], parts[1]))
2947-
if parts[0] == "tensorflow2":
2948-
parts[0] = "tensorflow"
29492919
images[parts[0]] = parts[1]
29502920

29512921
# Initialize map of library paths for each backend.
@@ -2954,8 +2924,6 @@ def enable_all():
29542924
parts = lpath.split(":")
29552925
if len(parts) == 2:
29562926
log('backend "{}" library path "{}"'.format(parts[0], parts[1]))
2957-
if parts[0] == "tensorflow2":
2958-
parts[0] = "tensorflow"
29592927
library_paths[parts[0]] = parts[1]
29602928

29612929
# Parse any explicitly specified cmake arguments

compose.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,10 @@ def start_dockerfile(ddir, images, argmap, dockerfile_name, backends):
7171
argmap["TRITON_VERSION"], argmap["TRITON_CONTAINER_VERSION"], images["full"]
7272
)
7373

74-
# PyTorch, TensorFlow backends need extra CUDA and other
74+
# PyTorch backends need extra CUDA and other
7575
# dependencies during runtime that are missing in the CPU-only base container.
7676
# These dependencies must be copied from the Triton Min image.
77-
if not FLAGS.enable_gpu and (
78-
("pytorch" in backends)
79-
or ("tensorflow" in backends)
80-
or ("tensorflow2" in backends)
81-
):
77+
if not FLAGS.enable_gpu and "pytorch" in backends:
8278
df += """
8379
FROM {} AS min_container
8480
@@ -406,7 +402,7 @@ def create_argmap(images, skip_pull):
406402
'<image-name>,<full-image-name>. <image-name> can be "min", "gpu-min" '
407403
'or "full". Both "min" and "full" need to be specified at the same time.'
408404
'This will override "--container-version". "gpu-min" is needed for '
409-
"CPU-only container to copy TensorFlow and PyTorch deps.",
405+
"CPU-only container to copy PyTorch deps.",
410406
)
411407
parser.add_argument(
412408
"--enable-gpu",
@@ -504,13 +500,9 @@ def create_argmap(images, skip_pull):
504500
fail_if(len(images) < 2, "Need to specify both 'full' and 'min' images if at all")
505501

506502
# For CPU-only image we need to copy some cuda libraries and dependencies
507-
# since we are using PyTorch, TensorFlow 1, TensorFlow 2 containers that
503+
# since we are using PyTorch containers that
508504
# are not CPU-only.
509-
if (
510-
("pytorch" in FLAGS.backend)
511-
or ("tensorflow" in FLAGS.backend)
512-
or ("tensorflow2" in FLAGS.backend)
513-
) and ("gpu-min" not in images):
505+
if ("pytorch" in FLAGS.backend) and ("gpu-min" not in images):
514506
images["gpu-min"] = "nvcr.io/nvidia/tritonserver:{}-py3-min".format(
515507
FLAGS.container_version
516508
)

deploy/alibaba-cloud/README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -39,7 +39,7 @@ This repository contains information about how to deploy NVIDIA Triton Inference
3939
- EAS provides a simple way for deep learning developers to deploy their models in Alibaba Cloud.
4040
- Using **Triton Processor** is the recommended way on EAS to deploy Triton Inference Server. Users can simply deploy a Triton Server by preparing models and creating a EAS service by setting processor type to `triton`.
4141
- Models should be uploaded to Alibaba Cloud's OSS(Object Storage Service). User's model repository in OSS will be mounted onto local path visible to Triton Server.
42-
- This documentation uses Triton's own example models for demo. The tensorflow inception model can be downloaded by the `fetch_models.sh` script.
42+
- This documentation uses Triton's own example models for demo. The ONNX inception v3 model can be obtained by the `fetch_models.sh` script.
4343

4444
# Prerequisites
4545
- You should register an Alibaba Cloud Account, and being able to use EAS by [eascmd](https://help.aliyun.com/document_detail/111031.html?spm=a2c4g.11186623.6.752.42356f46FN5fU1), which is a command line tool to create stop or scale services on EAS.
@@ -48,10 +48,10 @@ This repository contains information about how to deploy NVIDIA Triton Inference
4848

4949
# Demo Instruction
5050
## Prepare a model repo directory in OSS
51-
Download the tensorflow inception model via [fetch_model.sh](https://github.com/triton-inference-server/server/blob/main/docs/examples/fetch_models.sh). Then using [ossutil](https://help.aliyun.com/document_detail/50452.html?spm=a2c4g.11186623.6.833.26d66d51dPEytI) , which is a command line tool to use OSS, to upload the model to a certain OSS dir as you want.
51+
Download the ONNX inception v3 model via [fetch_model.sh](https://github.com/triton-inference-server/server/blob/main/docs/examples/fetch_models.sh). Then using [ossutil](https://help.aliyun.com/document_detail/50452.html?spm=a2c4g.11186623.6.833.26d66d51dPEytI) , which is a command line tool to use OSS, to upload the model to a certain OSS dir as you want.
5252

5353
```
54-
./ossutil cp inception_graphdef/ oss://triton-model-repo/models
54+
./ossutil cp inception_v3_onnx/ oss://triton-model-repo/models
5555
```
5656
## Create Triton Service with json config by eascmd
5757
The following is the json we use when creating a Triton Server on EAS.
@@ -125,7 +125,7 @@ triton_client = httpclient.InferenceServerClient(url=URL, verbose=False)
125125
start = time.time()
126126
for i in range(10):
127127
results = triton_client.infer(
128-
"inception_graphdef", inputs=[input_img], outputs=[output], headers=HEADERS
128+
"inception_v3_onnx", inputs=[input_img], outputs=[output], headers=HEADERS
129129
)
130130
res_body = results.get_response()
131131
elapsed_ms = (time.time() - start) * 1000

deploy/aws/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
# Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
2+
# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -224,7 +224,7 @@ using image classification models being served by the inference
224224
server. For example,
225225

226226
```
227-
$ image_client -u 34.83.9.133:8000 -m inception_graphdef -s INCEPTION -c3 mug.jpg
227+
$ image_client -u 34.83.9.133:8000 -m inception_v3_onnx -s INCEPTION -c3 mug.jpg
228228
Request 0, batch size 1
229229
Image 'images/mug.jpg':
230230
504 (COFFEE MUG) = 0.723992

deploy/gcp/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
# Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
2+
# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -262,7 +262,7 @@ using image classification models being served by the inference
262262
server. For example,
263263

264264
```
265-
$ image_client -u 34.83.9.133:8000 -m inception_graphdef -s INCEPTION -c3 mug.jpg
265+
$ image_client -u 34.83.9.133:8000 -m inception_v3_onnx -s INCEPTION -c3 mug.jpg
266266
Request 0, batch size 1
267267
Image 'images/mug.jpg':
268268
504 (COFFEE MUG) = 0.723992

deploy/k8s-onprem/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ using image classification models on the inference
303303
server. For example,
304304

305305
```
306-
$ image_client -u $cluster_ip:8000 -m inception_graphdef -s INCEPTION -c3 mug.jpg
306+
$ image_client -u $cluster_ip:8000 -m inception_v3_onnx -s INCEPTION -c3 mug.jpg
307307
Request 0, batch size 1
308308
Image 'images/mug.jpg':
309309
504 (COFFEE MUG) = 0.723992

deploy/oci/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
# Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
2+
# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -268,7 +268,7 @@ using image classification models being served by the inference
268268
server. For example,
269269

270270
```
271-
$ image_client -u 34.83.9.133:8000 -m inception_graphdef -s INCEPTION -c3 mug.jpg
271+
$ image_client -u 34.83.9.133:8000 -m inception_v3_onnx -s INCEPTION -c3 mug.jpg
272272
Request 0, batch size 1
273273
Image 'images/mug.jpg':
274274
504 (COFFEE MUG) = 0.723992

docs/examples/fetch_models.sh

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/bin/bash
2-
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
2+
3+
# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.
34
#
45
# Redistribution and use in source and binary forms, with or without
56
# modification, are permitted provided that the following conditions
@@ -27,12 +28,19 @@
2728

2829
set -ex
2930

30-
# TensorFlow inception
31-
mkdir -p model_repository/inception_graphdef/1
31+
# Convert Tensorflow inception V3 module to ONNX
32+
# Pre-requisite: Python3, venv, and Pip3 are installed on the system
33+
mkdir -p model_repository/inception_onnx/1
3234
wget -O /tmp/inception_v3_2016_08_28_frozen.pb.tar.gz \
3335
https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz
3436
(cd /tmp && tar xzf inception_v3_2016_08_28_frozen.pb.tar.gz)
35-
mv /tmp/inception_v3_2016_08_28_frozen.pb model_repository/inception_graphdef/1/model.graphdef
37+
python3 -m venv tf2onnx
38+
source ./tf2onnx/bin/activate
39+
pip3 install "numpy<2" tensorflow tf2onnx
40+
python3 -m tf2onnx.convert --graphdef /tmp/inception_v3_2016_08_28_frozen.pb --output inception_v3_onnx.model.onnx --inputs input:0 --outputs InceptionV3/Predictions/Softmax:0
41+
deactivate
42+
mv inception_v3_onnx.model.onnx model_repository/inception_onnx/1/model.onnx
43+
3644

3745
# ONNX densenet
3846
mkdir -p model_repository/densenet_onnx/1

docs/examples/model_repository/inception_graphdef/config.pbtxt

Lines changed: 0 additions & 19 deletions
This file was deleted.

0 commit comments

Comments
 (0)