Skip to content

Commit 088d9a9

Browse files
authored
fix: Fix L0_perf_nomodel shared memory (#7709)
1 parent fa0008b commit 088d9a9

File tree

2 files changed

+19
-7
lines changed

2 files changed

+19
-7
lines changed

qa/L0_perf_nomodel/run_test.sh

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -38,6 +38,7 @@ PERF_CLIENT_PERCENTILE=${PERF_CLIENT_PERCENTILE:=95}
3838
PERF_CLIENT_STABILIZE_WINDOW=${PERF_CLIENT_STABILIZE_WINDOW:=5000}
3939
PERF_CLIENT_STABILIZE_THRESHOLD=${PERF_CLIENT_STABILIZE_THRESHOLD:=5}
4040
TENSOR_SIZE=${TENSOR_SIZE:=1}
41+
TENSOR_ELEMENT_BYTES=${TENSOR_ELEMENT_BYTES:=4}
4142
SHARED_MEMORY=${SHARED_MEMORY:="none"}
4243
REPORTER=../common/reporter.py
4344

@@ -126,6 +127,16 @@ for BACKEND in $BACKENDS; do
126127
fi
127128
fi
128129

130+
# set shared memory output size
131+
OUTPUT_SHARED_MEMORY_SIZE=""
132+
if [[ "$SHARED_MEMORY" != "none" ]]; then
133+
OUTPUT_SHARED_MEMORY_SIZE=$((TENSOR_ELEMENT_BYTES*TENSOR_SIZE))
134+
if [ $MAX_BATCH > 1 ]; then
135+
OUTPUT_SHARED_MEMORY_SIZE=$((OUTPUT_SHARED_MEMORY_SIZE*MAX_BATCH))
136+
fi
137+
OUTPUT_SHARED_MEMORY_SIZE="--output-shared-memory-size $OUTPUT_SHARED_MEMORY_SIZE"
138+
fi
139+
129140
if [ $DYNAMIC_BATCH > 1 ]; then
130141
NAME=${BACKEND}_sbatch${STATIC_BATCH}_dbatch${DYNAMIC_BATCH}_instance${INSTANCE_CNT}
131142
else
@@ -189,6 +200,7 @@ for BACKEND in $BACKENDS; do
189200
-p${PERF_CLIENT_STABILIZE_WINDOW} \
190201
-s${PERF_CLIENT_STABILIZE_THRESHOLD} \
191202
${PERF_CLIENT_EXTRA_ARGS} \
203+
${OUTPUT_SHARED_MEMORY_SIZE} \
192204
-m ${MODEL_NAME} \
193205
-b${STATIC_BATCH} -t${CONCURRENCY} \
194206
--max-trials "${PA_MAX_TRIALS}" \

qa/L0_perf_nomodel/test.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -72,14 +72,14 @@ RUNTEST=./run_test.sh
7272
# by 4.
7373
TENSOR_SIZE_16MB=$((4*1024*1024))
7474

75-
if [ "$BENCHMARK_TEST_SHARED_MEMORY" == "system" ]; then
75+
if [ "$TEST_SHARED_MEMORY" == "system" ]; then
7676
UNDERTEST_NAME="$UNDERTEST_NAME System Shared Memory";
7777
SUFFIX="_shm"
78-
elif [ "$BENCHMARK_TEST_SHARED_MEMORY" == "cuda" ]; then
78+
elif [ "$TEST_SHARED_MEMORY" == "cuda" ]; then
7979
UNDERTEST_NAME="$UNDERTEST_NAME CUDA Shared Memory";
8080
SUFFIX="_cudashm"
8181
else
82-
BENCHMARK_TEST_SHARED_MEMORY="none"
82+
TEST_SHARED_MEMORY="none"
8383
TEST_NAMES=(
8484
"${UNDERTEST_NAME} Minimum Latency GRPC"
8585
"${UNDERTEST_NAME} Minimum Latency HTTP"
@@ -188,7 +188,7 @@ for idx in "${!TEST_NAMES[@]}"; do
188188
TEST_CONCURRENCY=${TEST_CONCURRENCY[$idx]}
189189

190190
# FIXME: If PA C API adds SHMEM support, remove this.
191-
if [[ "${BENCHMARK_TEST_SHARED_MEMORY}" != "none" ]] && \
191+
if [[ "${TEST_SHARED_MEMORY}" != "none" ]] && \
192192
[[ "${TEST_PROTOCOL}" == "triton_c_api" ]]; then
193193
echo "WARNING: Perf Analyzer does not support shared memory I/O when benchmarking directly with Triton C API, skipping."
194194
continue
@@ -202,7 +202,7 @@ for idx in "${!TEST_NAMES[@]}"; do
202202
PERF_CLIENT_PROTOCOL=${TEST_PROTOCOL} \
203203
TENSOR_SIZE=${TEST_TENSOR_SIZE} \
204204
BACKENDS=${TEST_BACKENDS} \
205-
SHARED_MEMORY=${BENCHMARK_TEST_SHARED_MEMORY} \
205+
SHARED_MEMORY=${TEST_SHARED_MEMORY} \
206206
STATIC_BATCH_SIZES=1 \
207207
DYNAMIC_BATCH_SIZES=1 \
208208
INSTANCE_COUNTS=${TEST_INSTANCE_COUNT} \

0 commit comments

Comments
 (0)