1
1
#! /bin/bash
2
- # Copyright (c) 2019-2021 , NVIDIA CORPORATION. All rights reserved.
2
+ # Copyright (c) 2019-2025 , NVIDIA CORPORATION. All rights reserved.
3
3
#
4
4
# Redistribution and use in source and binary forms, with or without
5
5
# modification, are permitted provided that the following conditions
@@ -38,6 +38,9 @@ if [ ! -z "$TEST_REPO_ARCH" ]; then
38
38
REPO_VERSION=${REPO_VERSION} _${TEST_REPO_ARCH}
39
39
fi
40
40
41
+ # Use "--request-count" throughout the test to PA stability criteria and
42
+ # reduce flaky failures from PA unstable measurements.
43
+ REQUEST_COUNT=10
41
44
CLIENT=../clients/perf_client
42
45
# Only use libtorch as it accepts GPU I/O and it can handle variable shape
43
46
BACKENDS=${BACKENDS:= " libtorch" }
@@ -91,7 +94,7 @@ for BACKEND in $BACKENDS; do
91
94
92
95
# Sanity check that the server allocates pinned memory for large size
93
96
set +e
94
- $CLIENT -m${ENSEMBLE_NAME} --shape INPUT0:16777216
97
+ $CLIENT -m${ENSEMBLE_NAME} --shape INPUT0:16777216 --request-count ${REQUEST_COUNT}
95
98
if (( $? != 0 )) ; then
96
99
RET=1
97
100
fi
@@ -128,6 +131,7 @@ for BACKEND in $BACKENDS; do
128
131
for TENSOR_SIZE in 16384 1048576 2097152 4194304 8388608 16777216; do
129
132
$CLIENT -i grpc -u localhost:8001 -m${ENSEMBLE_NAME} \
130
133
--shape INPUT0:${TENSOR_SIZE} \
134
+ --request-count ${REQUEST_COUNT} \
131
135
>> ${BACKEND} .${TENSOR_SIZE} .pinned.log 2>&1
132
136
if (( $? != 0 )) ; then
133
137
RET=1
@@ -150,7 +154,7 @@ for BACKEND in $BACKENDS; do
150
154
151
155
# Sanity check that the server allocates non-pinned memory
152
156
set +e
153
- $CLIENT -m${ENSEMBLE_NAME} --shape INPUT0:1
157
+ $CLIENT -m${ENSEMBLE_NAME} --shape INPUT0:1 --request-count ${REQUEST_COUNT}
154
158
if (( $? != 0 )) ; then
155
159
RET=1
156
160
fi
@@ -180,6 +184,7 @@ for BACKEND in $BACKENDS; do
180
184
for TENSOR_SIZE in 16384 1048576 2097152 4194304 8388608 16777216; do
181
185
$CLIENT -i grpc -u localhost:8001 -m${ENSEMBLE_NAME} \
182
186
--shape INPUT0:${TENSOR_SIZE} \
187
+ --request-count ${REQUEST_COUNT} \
183
188
>> ${BACKEND} .${TENSOR_SIZE} .nonpinned.log 2>&1
184
189
if (( $? != 0 )) ; then
185
190
RET=1
0 commit comments