Skip to content

Commit 59c1842

Browse files
authored
test: Fix requested output deleting extra outputs (#7866) (#7873)
1 parent e5deee0 commit 59c1842

File tree

4 files changed

+227
-0
lines changed

4 files changed

+227
-0
lines changed

qa/L0_backend_python/io/io_test.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,33 @@ def test_requested_output_decoupled(self):
259259
self.assertTrue(np.allclose(gpu_output_data[1:], next_gpu_output_data))
260260
self.assertTrue(user_data._completed_requests.empty())
261261

262+
# Assert a prior crash is fixed regarding requested output on a decoupled model.
263+
def test_requested_output_decoupled_prior_crash(self):
264+
model_name = "llm"
265+
prompt = "test"
266+
267+
text_input_data = np.array([[prompt]]).astype(object)
268+
inputs = [grpcclient.InferInput("text_input", text_input_data.shape, "BYTES")]
269+
inputs[-1].set_data_from_numpy(text_input_data)
270+
271+
requested_outputs = [grpcclient.InferRequestedOutput("text_output")]
272+
273+
user_data = UserData()
274+
with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
275+
client.start_stream(callback=partial(callback, user_data))
276+
client.async_stream_infer(
277+
model_name=model_name, inputs=inputs, outputs=requested_outputs
278+
)
279+
client.stop_stream()
280+
281+
outputs = ""
282+
while not user_data._completed_requests.empty():
283+
result = user_data._completed_requests.get(block=False)
284+
if isinstance(result, InferenceServerException):
285+
raise result
286+
outputs += str(result.as_numpy("text_output")[0], encoding="utf-8")
287+
self.assertGreater(len(outputs), 0, "text_output is empty")
288+
262289

263290
if __name__ == "__main__":
264291
unittest.main()
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
#
28+
# This test case was added based on a prior crash. DO NOT MODIFY!
29+
#
30+
31+
name: "llm"
32+
backend: "python"
33+
max_batch_size: 128
34+
35+
model_transaction_policy {
36+
decoupled: True
37+
}
38+
39+
input [
40+
{
41+
name: "text_input"
42+
data_type: TYPE_STRING
43+
dims: [ 1 ]
44+
}
45+
]
46+
output [
47+
{
48+
name: "text_output"
49+
data_type: TYPE_STRING
50+
dims: [ -1 ]
51+
},
52+
{
53+
name: "sequence_index"
54+
data_type: TYPE_INT32
55+
dims: [ 1 ]
56+
}
57+
]
58+
59+
instance_group [
60+
{
61+
count: 1
62+
kind : KIND_CPU
63+
}
64+
]
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
#
28+
# This test case was added based on a prior crash. DO NOT MODIFY!
29+
#
30+
31+
import json
32+
import traceback
33+
34+
import numpy as np
35+
import triton_python_backend_utils as pb_utils
36+
37+
38+
def get_valid_param_value(param, default_value=""):
39+
value = param.get("string_value", "")
40+
return default_value if value.startswith("${") or value == "" else value
41+
42+
43+
class TritonPythonModel:
44+
def initialize(self, args):
45+
model_config = json.loads(args["model_config"])
46+
self.output_config = pb_utils.get_output_config_by_name(
47+
model_config, "text_output"
48+
)
49+
self.output_dtype = pb_utils.triton_string_to_numpy(
50+
self.output_config["data_type"]
51+
)
52+
self.decoupled = pb_utils.using_decoupled_model_transaction_policy(model_config)
53+
self.logger = pb_utils.Logger
54+
55+
def create_triton_tensors(self, index):
56+
x = "bla" + str(index)
57+
output = [x.encode("utf8")]
58+
np_output = np.array(output).astype(self.output_dtype)
59+
seq_idx = np.array([[0]]).astype(np.int32)
60+
61+
t1 = pb_utils.Tensor("text_output", np_output)
62+
t2 = pb_utils.Tensor("sequence_index", seq_idx)
63+
tensors = [t1, t2]
64+
return tensors
65+
66+
def create_triton_response(self, index):
67+
tensors = self.create_triton_tensors(index)
68+
return pb_utils.InferenceResponse(output_tensors=tensors)
69+
70+
def execute(self, requests):
71+
responses = []
72+
for request in requests:
73+
if self.decoupled:
74+
response_sender = request.get_response_sender()
75+
try:
76+
for index in range(0, 1):
77+
triton_response = self.create_triton_response(index)
78+
if self.decoupled:
79+
response_sender.send(triton_response)
80+
else:
81+
responses.append(triton_response)
82+
83+
if self.decoupled:
84+
response_sender.send(
85+
flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
86+
)
87+
88+
except Exception:
89+
self.logger.log_error(traceback.format_exc())
90+
error_response = pb_utils.InferenceResponse(
91+
output_tensors=[],
92+
error=pb_utils.TritonError(traceback.format_exc()),
93+
)
94+
95+
if self.decoupled:
96+
response_sender.send(error_response)
97+
response_sender.send(
98+
flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
99+
)
100+
else:
101+
responses.append(error_response)
102+
103+
if self.decoupled:
104+
return None
105+
else:
106+
assert len(responses) == len(requests)
107+
return responses
108+
109+
def finalize(self):
110+
self.logger.log_info("Cleaning up...")

qa/L0_backend_python/io/test.sh

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,32 @@ done
176176
kill $SERVER_PID
177177
wait $SERVER_PID
178178

179+
# IOTest.test_requested_output_decoupled_prior_crash
180+
rm -rf models && mkdir models
181+
mkdir -p models/llm/1/
182+
cp requested_output_model/config.pbtxt models/llm/
183+
cp requested_output_model/model.py models/llm/1/
184+
185+
run_server
186+
if [ "$SERVER_PID" == "0" ]; then
187+
echo -e "\n***\n*** Failed to start $SERVER\n***"
188+
cat $SERVER_LOG
189+
RET=1
190+
fi
191+
192+
SUBTEST="test_requested_output_decoupled_prior_crash"
193+
set +e
194+
python3 -m pytest --junitxml=${SUBTEST}.report.xml ${UNITTEST_PY}::IOTest::${SUBTEST} > ${CLIENT_LOG}.${SUBTEST}
195+
if [ $? -ne 0 ]; then
196+
echo -e "\n***\n*** IOTest.${SUBTEST} FAILED. \n***"
197+
cat $CLIENT_LOG.${SUBTEST}
198+
RET=1
199+
fi
200+
set -e
201+
202+
kill $SERVER_PID
203+
wait $SERVER_PID
204+
179205
if [ $RET -eq 0 ]; then
180206
echo -e "\n***\n*** IO test PASSED.\n***"
181207
else

0 commit comments

Comments
 (0)