Skip to content

Commit 32fc89a

Browse files
npmilleraarongreig
andauthored
[UR][CUDA][HIP] Fix large kernel arg buffer overflow (#17538)
CUDA and HIP have a fixed maximum size for kernel arguments, this patch ensures that passing arguments large than this maximum size is handled properly by the adapters. This code really needs a refactoring to avoid duplication and allow larger argument sizes (supported by newer CUDA versions), but this will be in follow up work. This patch is a quick fix to avoid potential buffer overflows. --------- Co-authored-by: aarongreig <aaron.greig@codeplay.com>
1 parent 27aae7a commit 32fc89a

File tree

7 files changed

+76
-19
lines changed

7 files changed

+76
-19
lines changed

unified-runtime/source/adapters/cuda/kernel.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -362,24 +362,28 @@ urKernelSetArgPointer(ur_kernel_handle_t hKernel, uint32_t argIndex,
362362
const ur_kernel_arg_pointer_properties_t *pProperties,
363363
const void *pArgValue) {
364364
std::ignore = pProperties;
365-
// setKernelArg is expecting a pointer to our argument
366-
hKernel->setKernelArg(argIndex, sizeof(pArgValue), &pArgValue);
365+
try {
366+
// setKernelArg is expecting a pointer to our argument
367+
hKernel->setKernelArg(argIndex, sizeof(pArgValue), &pArgValue);
368+
} catch (ur_result_t Err) {
369+
return Err;
370+
}
367371
return UR_RESULT_SUCCESS;
368372
}
369373

370374
UR_APIEXPORT ur_result_t UR_APICALL
371375
urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex,
372376
const ur_kernel_arg_mem_obj_properties_t *Properties,
373377
ur_mem_handle_t hArgValue) {
374-
// Below sets kernel arg when zero-sized buffers are handled.
375-
// In such case the corresponding memory is null.
376-
if (hArgValue == nullptr) {
377-
hKernel->setKernelArg(argIndex, 0, nullptr);
378-
return UR_RESULT_SUCCESS;
379-
}
380-
381378
ur_result_t Result = UR_RESULT_SUCCESS;
382379
try {
380+
// Below sets kernel arg when zero-sized buffers are handled.
381+
// In such case the corresponding memory is null.
382+
if (hArgValue == nullptr) {
383+
hKernel->setKernelArg(argIndex, 0, nullptr);
384+
return UR_RESULT_SUCCESS;
385+
}
386+
383387
auto Device = hKernel->getProgram()->getDevice();
384388
ur_mem_flags_t MemAccess =
385389
Properties ? Properties->memoryAccess

unified-runtime/source/adapters/cuda/kernel.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@ struct ur_kernel_handle_t_ {
117117

118118
// Copy new argument to storage if it hasn't been added before.
119119
if (ParamSizes[Index] == 0) {
120+
if ((InsertPos + Size) > MaxParamBytes) {
121+
throw UR_RESULT_ERROR_OUT_OF_RESOURCES;
122+
}
120123
ParamSizes[Index] = Size;
121124
std::memcpy(&Storage[InsertPos], Arg, Size);
122125
ArgPointers[Index] = &Storage[InsertPos];

unified-runtime/source/adapters/hip/kernel.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -295,24 +295,28 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
295295
UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer(
296296
ur_kernel_handle_t hKernel, uint32_t argIndex,
297297
const ur_kernel_arg_pointer_properties_t *, const void *pArgValue) {
298-
// setKernelArg is expecting a pointer to our argument
299-
hKernel->setKernelArg(argIndex, sizeof(pArgValue), &pArgValue);
298+
try {
299+
// setKernelArg is expecting a pointer to our argument
300+
hKernel->setKernelArg(argIndex, sizeof(pArgValue), &pArgValue);
301+
} catch (ur_result_t Err) {
302+
return Err;
303+
}
300304
return UR_RESULT_SUCCESS;
301305
}
302306

303307
UR_APIEXPORT ur_result_t UR_APICALL
304308
urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex,
305309
const ur_kernel_arg_mem_obj_properties_t *Properties,
306310
ur_mem_handle_t hArgValue) {
307-
// Below sets kernel arg when zero-sized buffers are handled.
308-
// In such case the corresponding memory is null.
309-
if (hArgValue == nullptr) {
310-
hKernel->setKernelArg(argIndex, 0, nullptr);
311-
return UR_RESULT_SUCCESS;
312-
}
313-
314311
ur_result_t Result = UR_RESULT_SUCCESS;
315312
try {
313+
// Below sets kernel arg when zero-sized buffers are handled.
314+
// In such case the corresponding memory is null.
315+
if (hArgValue == nullptr) {
316+
hKernel->setKernelArg(argIndex, 0, nullptr);
317+
return UR_RESULT_SUCCESS;
318+
}
319+
316320
auto Device = hKernel->getProgram()->getDevice();
317321
hKernel->Args.addMemObjArg(argIndex, hArgValue,
318322
Properties ? Properties->memoryAccess : 0);

unified-runtime/source/adapters/hip/kernel.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ struct ur_kernel_handle_t_ {
110110

111111
// Copy new argument to storage if it hasn't been added before.
112112
if (ParamSizes[Index] == 0) {
113+
if ((InsertPos + Size) > MAX_PARAM_BYTES) {
114+
throw UR_RESULT_ERROR_OUT_OF_RESOURCES;
115+
}
113116
ParamSizes[Index] = Size;
114117
std::memcpy(&Storage[InsertPos], Arg, Size);
115118
ArgPointers[Index] = &Storage[InsertPos];

unified-runtime/test/adapters/cuda/kernel_tests.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,27 @@ TEST_P(cudaKernelTest, URKernelArgumentSimple) {
160160
ASSERT_EQ(storedValue, number);
161161
}
162162

163+
TEST_P(cudaKernelTest, URKernelArgumentLarge) {
164+
uur::raii::Program program = nullptr;
165+
auto Length = std::strlen(ptxSource);
166+
ASSERT_SUCCESS(urProgramCreateWithBinary(context, 1, &device, &Length,
167+
(const uint8_t **)(&ptxSource),
168+
nullptr, program.ptr()));
169+
ASSERT_NE(program, nullptr);
170+
ASSERT_SUCCESS(urProgramBuild(context, program, nullptr));
171+
172+
uur::raii::Kernel kernel = nullptr;
173+
ASSERT_SUCCESS(urKernelCreate(program, "_Z8myKernelPi", kernel.ptr()));
174+
ASSERT_NE(kernel, nullptr);
175+
176+
// The CUDA adapter can't do proper argument validation so any kernel will
177+
// work for this test.
178+
std::array<uint8_t, 4004> data;
179+
data.fill(0);
180+
ASSERT_EQ_RESULT(urKernelSetArgValue(kernel, 0, 4004, nullptr, data.data()),
181+
UR_RESULT_ERROR_OUT_OF_RESOURCES);
182+
}
183+
163184
TEST_P(cudaKernelTest, URKernelArgumentSetTwice) {
164185
uur::raii::Program program = nullptr;
165186
auto Length = std::strlen(ptxSource);

unified-runtime/test/adapters/hip/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

66
add_adapter_test(hip
7-
FIXTURE DEVICES
7+
FIXTURE KERNELS
88
SOURCES
99
fixtures.h
1010
urContextGetNativeHandle.cpp
1111
urDeviceGetNativeHandle.cpp
1212
urEventGetNativeHandle.cpp
1313
test_context.cpp
1414
test_event.cpp
15+
kernel_tests.cpp
1516
ENVIRONMENT
1617
"UR_ADAPTERS_FORCE_LOAD=\"$<TARGET_FILE:ur_adapter_hip>\""
1718
)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Copyright (C) 2025 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
3+
// Exceptions. See LICENSE.TXT
4+
//
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
7+
#include "kernel.hpp"
8+
#include "uur/fixtures.h"
9+
#include "uur/raii.h"
10+
11+
using hipKernelTest = uur::urKernelExecutionTest;
12+
UUR_INSTANTIATE_DEVICE_TEST_SUITE(hipKernelTest);
13+
14+
TEST_P(hipKernelTest, URKernelArgumentLarge) {
15+
// The HIP adapter can't do proper argument validation so any kernel will
16+
// work for this test.
17+
std::array<uint8_t, 4004> data;
18+
data.fill(0);
19+
ASSERT_EQ_RESULT(urKernelSetArgValue(kernel, 0, 4004, nullptr, data.data()),
20+
UR_RESULT_ERROR_OUT_OF_RESOURCES);
21+
}

0 commit comments

Comments
 (0)