Skip to content

Commit 83d0e30

Browse files
authored
fix: L0_sequence_batcher_cudashm (#7852)
1 parent 71ca0c5 commit 83d0e30

File tree

2 files changed

+21
-9
lines changed

2 files changed

+21
-9
lines changed

src/test/sequence/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
#
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted provided that the following conditions
@@ -43,7 +43,7 @@ add_library(
4343
TritonSequenceBackend::triton-sequence-backend ALIAS triton-sequence-backend
4444
)
4545

46-
target_compile_features(triton-sequence-backend PRIVATE cxx_std_11)
46+
target_compile_features(triton-sequence-backend PRIVATE cxx_std_17)
4747
target_compile_options(
4848
triton-sequence-backend PRIVATE
4949
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:

src/test/sequence/src/sequence.cc

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
1+
// Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
22
//
33
// Redistribution and use in source and binary forms, with or without
44
// modification, are permitted provided that the following conditions
@@ -847,9 +847,15 @@ TRITONBACKEND_ModelInstanceExecute(
847847
if (input_memory_type == TRITONSERVER_MEMORY_GPU) {
848848
ipbuffer_vec.resize(input_element_cnt);
849849
ipbuffer_int = ipbuffer_vec.data();
850-
cudaMemcpy(
851-
const_cast<int32_t*>(ipbuffer_int), input_buffer, input_byte_size,
852-
cudaMemcpyDeviceToHost);
850+
LOG_IF_CUDA_ERROR(
851+
cudaMemcpyAsync(
852+
const_cast<int32_t*>(ipbuffer_int), input_buffer, input_byte_size,
853+
cudaMemcpyDeviceToHost, instance_state->CudaStream()),
854+
"failed to copy buffer from Device to Host");
855+
856+
LOG_IF_CUDA_ERROR(
857+
cudaStreamSynchronize(instance_state->CudaStream()),
858+
"failed to perform synchronization on cuda stream");
853859
} else {
854860
ipbuffer_int = reinterpret_cast<const int32_t*>(input_buffer);
855861
}
@@ -939,9 +945,15 @@ TRITONBACKEND_ModelInstanceExecute(
939945
}
940946

941947
if (output_memory_type == TRITONSERVER_MEMORY_GPU) {
942-
cudaMemcpy(
943-
output_buffer, const_cast<int32_t*>(obuffer_int),
944-
buffer_byte_size, cudaMemcpyHostToDevice);
948+
LOG_IF_CUDA_ERROR(
949+
cudaMemcpyAsync(
950+
output_buffer, const_cast<int32_t*>(obuffer_int),
951+
buffer_byte_size, cudaMemcpyHostToDevice,
952+
instance_state->CudaStream()),
953+
"failed to copy buffer from Device to Host");
954+
LOG_IF_CUDA_ERROR(
955+
cudaStreamSynchronize(instance_state->CudaStream()),
956+
"failed to perform synchronization on cuda stream");
945957
}
946958
}
947959
}

0 commit comments

Comments
 (0)