Skip to content

Commit 6c03694

Browse files
authored
[SYCL][CUDA][HIP] Add implementation of piextGetDeviceFunctionPointer (#4599)
This patch adds an implementation of `piextGetDeviceFunctionPointer` for CUDA and HIP. program_impl.cpp `has_kernel`now uses `piextGetDeviceFunctionPointer` to implement a more robust method for CUDA and HIP. The previous regex search used with `piPrgramGetInfo`, with `PI_PROGRAM_INFO_KERNEL_NAMES`, is removed and replaced with a die message as it does not work correctly with module-splitting per source. This patch allows CUDA to pass tests: DeviceCodeSplit/split-per-kernel.cpp DeviceCodeSplit/split-per-source-main.cpp KernelAndProgram/basic-program.cpp KernelAndProgram/kernel-and-program.cpp A follow up PR to llvm-test-suite will be made to enable these test for cuda. This PR comes from modifications requested in #4565
1 parent e5cc9b7 commit 6c03694

File tree

8 files changed

+240
-94
lines changed

8 files changed

+240
-94
lines changed

sycl/include/CL/sycl/detail/pi.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ typedef enum {
112112
PI_INVALID_IMAGE_FORMAT_DESCRIPTOR = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
113113
PI_IMAGE_FORMAT_NOT_SUPPORTED = CL_IMAGE_FORMAT_NOT_SUPPORTED,
114114
PI_MEM_OBJECT_ALLOCATION_FAILURE = CL_MEM_OBJECT_ALLOCATION_FAILURE,
115+
PI_FUNCTION_ADDRESS_IS_NOT_AVAILABLE =
116+
-998, ///< PI_FUNCTION_ADDRESS_IS_NOT_AVAILABLE indicates a fallback
117+
///< method determines the function exists but its address cannot be
118+
///< found.
115119
PI_ERROR_UNKNOWN = -999
116120
} _pi_result;
117121

@@ -987,6 +991,9 @@ __SYCL_EXPORT pi_result piextDeviceSelectBinary(pi_device device,
987991
/// must present in the list of devices returned by \c get_device method for
988992
/// \arg \c program.
989993
///
994+
/// If a fallback method determines the function exists but the address is
995+
/// not available PI_FUNCTION_ADDRESS_IS_NOT_AVAILABLE is returned. If the
996+
/// address does not exist PI_INVALID_KERNEL_NAME is returned.
990997
__SYCL_EXPORT pi_result piextGetDeviceFunctionPointer(
991998
pi_device device, pi_program program, const char *function_name,
992999
pi_uint64 *function_pointer_ret);

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -575,20 +575,8 @@ pi_result _pi_program::build_program(const char *build_options) {
575575
/// Note: Another alternative is to add kernel names as metadata, like with
576576
/// reqd_work_group_size.
577577
std::string getKernelNames(pi_program program) {
578-
std::string source(program->binary_,
579-
program->binary_ + program->binarySizeInBytes_);
580-
std::regex entries_pattern(".entry\\s+([^\\([:s:]]*)");
581-
std::string names("");
582-
std::smatch match;
583-
bool first_match = true;
584-
while (std::regex_search(source, match, entries_pattern)) {
585-
assert(match.size() == 2);
586-
names += first_match ? "" : ";";
587-
names += match[1]; // Second element is the group.
588-
source = match.suffix().str();
589-
first_match = false;
590-
}
591-
return names;
578+
cl::sycl::detail::pi::die("getKernelNames not implemented");
579+
return {};
592580
}
593581

594582
/// RAII object that calls the reference count release function on the held PI
@@ -921,11 +909,27 @@ pi_result cuda_piextDeviceSelectBinary(pi_device device,
921909
return PI_INVALID_BINARY;
922910
}
923911

924-
pi_result cuda_piextGetDeviceFunctionPointer(pi_device, pi_program,
925-
const char *, pi_uint64 *) {
926-
cl::sycl::detail::pi::die(
927-
"cuda_piextGetDeviceFunctionPointer not implemented");
928-
return {};
912+
pi_result cuda_piextGetDeviceFunctionPointer(pi_device device,
913+
pi_program program,
914+
const char *func_name,
915+
pi_uint64 *func_pointer_ret) {
916+
// Check if device passed is the same the device bound to the context
917+
assert(device == program->get_context()->get_device());
918+
assert(func_pointer_ret != nullptr);
919+
920+
CUfunction func;
921+
CUresult ret = cuModuleGetFunction(&func, program->get(), func_name);
922+
*func_pointer_ret = reinterpret_cast<pi_uint64>(func);
923+
pi_result retError = PI_SUCCESS;
924+
925+
if (ret != CUDA_SUCCESS && ret != CUDA_ERROR_NOT_FOUND)
926+
retError = PI_CHECK_ERROR(ret);
927+
if (ret == CUDA_ERROR_NOT_FOUND) {
928+
*func_pointer_ret = 0;
929+
retError = PI_INVALID_KERNEL_NAME;
930+
}
931+
932+
return retError;
929933
}
930934

931935
/// \return PI_SUCCESS always since CUDA devices are always root devices.

sycl/plugins/hip/pi_hip.cpp

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -567,20 +567,8 @@ pi_result _pi_program::build_program(const char *build_options) {
567567
/// has_kernel method, so an alternative would be to move the has_kernel
568568
/// query to PI and use hipModuleGetFunction to check for a kernel.
569569
std::string getKernelNames(pi_program program) {
570-
std::string source(program->binary_,
571-
program->binary_ + program->binarySizeInBytes_);
572-
std::regex entries_pattern(".entry\\s+([^\\([:s:]]*)");
573-
std::string names("");
574-
std::smatch match;
575-
bool first_match = true;
576-
while (std::regex_search(source, match, entries_pattern)) {
577-
assert(match.size() == 2);
578-
names += first_match ? "" : ";";
579-
names += match[1]; // Second element is the group.
580-
source = match.suffix().str();
581-
first_match = false;
582-
}
583-
return names;
570+
cl::sycl::detail::pi::die("getKernelNames not implemented");
571+
return {};
584572
}
585573

586574
/// RAII object that calls the reference count release function on the held PI
@@ -909,16 +897,25 @@ pi_result hip_piextDeviceSelectBinary(pi_device device,
909897

910898
pi_result hip_piextGetDeviceFunctionPointer(pi_device device,
911899
pi_program program,
912-
const char *function_name,
913-
pi_uint64 *function_pointer_ret) {
914-
(void)device;
915-
(void)program;
916-
(void)function_name;
917-
(void)function_pointer_ret;
900+
const char *func_name,
901+
pi_uint64 *func_pointer_ret) {
902+
// Check if device passed is the same the device bound to the context
903+
assert(device == program->get_context()->get_device());
904+
assert(func_pointer_ret != nullptr);
905+
906+
hipFunction_t func;
907+
hipError_t ret = hipModuleGetFunction(&func, program->get(), func_name);
908+
*func_pointer_ret = reinterpret_cast<pi_uint64>(func);
909+
pi_result retError = PI_SUCCESS;
918910

919-
cl::sycl::detail::pi::die(
920-
"hip_piextGetDeviceFunctionPointer not implemented");
921-
return {};
911+
if (ret != hipSuccess && ret != hipErrorNotFound)
912+
retError = PI_CHECK_ERROR(ret);
913+
if (ret == hipErrorNotFound) {
914+
*func_pointer_ret = 0;
915+
retError = PI_INVALID_KERNEL_NAME;
916+
}
917+
918+
return retError;
922919
}
923920

924921
/// \return PI_SUCCESS always since HIP devices are always root devices.

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6248,6 +6248,32 @@ pi_result piEnqueueNativeKernel(pi_queue Queue, void (*UserFunc)(void *),
62486248
return {};
62496249
}
62506250

6251+
// Function gets characters between delimeter's in str
6252+
// then checks if they are equal to the sub_str.
6253+
// returns true if there is at least one instance
6254+
// returns false if there are no instances of the name
6255+
static bool is_in_separated_string(const std::string &str, char delimiter,
6256+
const std::string &sub_str) {
6257+
size_t beg = 0;
6258+
size_t length = 0;
6259+
for (const auto &x : str) {
6260+
if (x == delimiter) {
6261+
if (str.substr(beg, length) == sub_str)
6262+
return true;
6263+
6264+
beg += length + 1;
6265+
length = 0;
6266+
continue;
6267+
}
6268+
length++;
6269+
}
6270+
if (length != 0)
6271+
if (str.substr(beg, length) == sub_str)
6272+
return true;
6273+
6274+
return false;
6275+
}
6276+
62516277
// TODO: Check if the function_pointer_ret type can be converted to void**.
62526278
pi_result piextGetDeviceFunctionPointer(pi_device Device, pi_program Program,
62536279
const char *FunctionName,
@@ -6272,6 +6298,40 @@ pi_result piextGetDeviceFunctionPointer(pi_device Device, pi_program Program,
62726298
ModIt++;
62736299
}
62746300

6301+
// zeModuleGetFunctionPointer currently fails for all
6302+
// kernels regardless of if the kernel exist or not
6303+
// with ZE_RESULT_ERROR_INVALID_ARGUMENT
6304+
// TODO: remove when this is no longer the case
6305+
// If zeModuleGetFunctionPointer returns invalid argument,
6306+
// fallback to searching through kernel list and return
6307+
// PI_FUNCTION_ADDRESS_IS_NOT_AVAILABLE if the function exists
6308+
// or PI_INVALID_KERNEL_NAME if the function does not exist.
6309+
// FunctionPointerRet should always be 0
6310+
if (ZeResult == ZE_RESULT_ERROR_INVALID_ARGUMENT) {
6311+
size_t Size;
6312+
*FunctionPointerRet = 0;
6313+
PI_CALL(piProgramGetInfo(Program, PI_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr,
6314+
&Size));
6315+
6316+
std::string ClResult(Size, ' ');
6317+
PI_CALL(piProgramGetInfo(Program, PI_PROGRAM_INFO_KERNEL_NAMES,
6318+
ClResult.size(), &ClResult[0], nullptr));
6319+
6320+
// Get rid of the null terminator and search for kernel_name
6321+
// If function can be found return error code to indicate it
6322+
// exists
6323+
ClResult.pop_back();
6324+
if (is_in_separated_string(ClResult, ';', std::string(FunctionName)))
6325+
return PI_FUNCTION_ADDRESS_IS_NOT_AVAILABLE;
6326+
6327+
return PI_INVALID_KERNEL_NAME;
6328+
}
6329+
6330+
if (ZeResult == ZE_RESULT_ERROR_INVALID_FUNCTION_NAME) {
6331+
*FunctionPointerRet = 0;
6332+
return PI_INVALID_KERNEL_NAME;
6333+
}
6334+
62756335
return mapError(ZeResult);
62766336
}
62776337

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 91 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ CONSTFIX char clEnqueueMemcpyName[] = "clEnqueueMemcpyINTEL";
6565
CONSTFIX char clGetMemAllocInfoName[] = "clGetMemAllocInfoINTEL";
6666
CONSTFIX char clSetProgramSpecializationConstantName[] =
6767
"clSetProgramSpecializationConstant";
68+
CONSTFIX char clGetDeviceFunctionPointerName[] =
69+
"clGetDeviceFunctionPointerINTEL";
6870

6971
#undef CONSTFIX
7072

@@ -77,8 +79,10 @@ static pi_result getExtFuncFromContext(pi_context context, T *fptr) {
7779

7880
// if cached, return cached FuncPtr
7981
if (auto F = FuncPtrs[context]) {
82+
// if cached that extension is not available return nullptr and
83+
// PI_INVALID_VALUE
8084
*fptr = F;
81-
return PI_SUCCESS;
85+
return F ? PI_SUCCESS : PI_INVALID_VALUE;
8286
}
8387

8488
cl_uint deviceCount;
@@ -110,8 +114,11 @@ static pi_result getExtFuncFromContext(pi_context context, T *fptr) {
110114
T FuncPtr =
111115
(T)clGetExtensionFunctionAddressForPlatform(curPlatform, FuncName);
112116

113-
if (!FuncPtr)
117+
if (!FuncPtr) {
118+
// Cache that the extension is not available
119+
FuncPtrs[context] = nullptr;
114120
return PI_INVALID_VALUE;
121+
}
115122

116123
*fptr = FuncPtr;
117124
FuncPtrs[context] = FuncPtr;
@@ -504,41 +511,97 @@ pi_result piextKernelCreateWithNativeHandle(pi_native_handle nativeHandle,
504511
return PI_SUCCESS;
505512
}
506513

514+
// Function gets characters between delimeter's in str
515+
// then checks if they are equal to the sub_str.
516+
// returns true if there is at least one instance
517+
// returns false if there are no instances of the name
518+
static bool is_in_separated_string(const std::string &str, char delimiter,
519+
const std::string &sub_str) {
520+
size_t beg = 0;
521+
size_t length = 0;
522+
for (const auto &x : str) {
523+
if (x == delimiter) {
524+
if (str.substr(beg, length) == sub_str)
525+
return true;
526+
527+
beg += length + 1;
528+
length = 0;
529+
continue;
530+
}
531+
length++;
532+
}
533+
if (length != 0)
534+
if (str.substr(beg, length) == sub_str)
535+
return true;
536+
537+
return false;
538+
}
539+
540+
typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetDeviceFunctionPointer_fn)(
541+
cl_device_id device, cl_program program, const char *FuncName,
542+
cl_ulong *ret_ptr);
507543
pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program,
508544
const char *func_name,
509545
pi_uint64 *function_pointer_ret) {
510-
pi_platform platform;
546+
547+
cl_context CLContext = nullptr;
511548
cl_int ret_err =
512-
clGetDeviceInfo(cast<cl_device_id>(device), PI_DEVICE_INFO_PLATFORM,
513-
sizeof(platform), &platform, nullptr);
549+
clGetProgramInfo(cast<cl_program>(program), CL_PROGRAM_CONTEXT,
550+
sizeof(CLContext), &CLContext, nullptr);
514551

515-
if (ret_err != CL_SUCCESS) {
552+
if (ret_err != CL_SUCCESS)
516553
return cast<pi_result>(ret_err);
517-
}
518554

519-
using FuncT =
520-
cl_int(CL_API_CALL *)(cl_device_id, cl_program, const char *, cl_ulong *);
521-
522-
// TODO: add check that device supports corresponding extension
523-
FuncT func_ptr =
524-
reinterpret_cast<FuncT>(clGetExtensionFunctionAddressForPlatform(
525-
cast<cl_platform_id>(platform), "clGetDeviceFunctionPointerINTEL"));
526-
// TODO: once we have check that device supports corresponding extension,
527-
// we can insert an assertion that func_ptr is not nullptr. For now, let's
528-
// just return an error if failed to query such function
529-
// assert(
530-
// func_ptr != nullptr &&
531-
// "Failed to get address of clGetDeviceFunctionPointerINTEL function");
532-
533-
if (!func_ptr) {
534-
if (function_pointer_ret)
555+
clGetDeviceFunctionPointer_fn FuncT = nullptr;
556+
ret_err = getExtFuncFromContext<clGetDeviceFunctionPointerName,
557+
clGetDeviceFunctionPointer_fn>(
558+
cast<pi_context>(CLContext), &FuncT);
559+
560+
pi_result pi_ret_err = PI_SUCCESS;
561+
562+
// Check if kernel name exists, to prevent opencl runtime throwing exception
563+
// with cpu runtime
564+
// TODO: Use fallback search method if extension does not exist once CPU
565+
// runtime no longer throws exceptions and prints messages when given
566+
// unavailable functions.
567+
*function_pointer_ret = 0;
568+
size_t Size;
569+
cl_int Res =
570+
clGetProgramInfo(cast<cl_program>(program), PI_PROGRAM_INFO_KERNEL_NAMES,
571+
0, nullptr, &Size);
572+
if (Res != CL_SUCCESS)
573+
return cast<pi_result>(Res);
574+
575+
std::string ClResult(Size, ' ');
576+
ret_err =
577+
clGetProgramInfo(cast<cl_program>(program), PI_PROGRAM_INFO_KERNEL_NAMES,
578+
ClResult.size(), &ClResult[0], nullptr);
579+
if (Res != CL_SUCCESS)
580+
return cast<pi_result>(Res);
581+
582+
// Get rid of the null terminator and search for kernel_name
583+
// If function cannot be found return error code to indicate it
584+
// exists
585+
ClResult.pop_back();
586+
if (!is_in_separated_string(ClResult, ';', func_name))
587+
return PI_INVALID_KERNEL_NAME;
588+
589+
pi_ret_err = PI_FUNCTION_ADDRESS_IS_NOT_AVAILABLE;
590+
591+
// If clGetDeviceFunctionPointer is in list of extensions
592+
if (FuncT) {
593+
pi_ret_err = cast<pi_result>(FuncT(cast<cl_device_id>(device),
594+
cast<cl_program>(program), func_name,
595+
function_pointer_ret));
596+
// GPU runtime sometimes returns PI_INVALID_ARG_VALUE if func address cannot
597+
// be found even if kernel exits. As the kernel does exist return that the
598+
// address is not available
599+
if (pi_ret_err == CL_INVALID_ARG_VALUE) {
535600
*function_pointer_ret = 0;
536-
return PI_INVALID_DEVICE;
601+
return PI_FUNCTION_ADDRESS_IS_NOT_AVAILABLE;
602+
}
537603
}
538-
539-
return cast<pi_result>(func_ptr(cast<cl_device_id>(device),
540-
cast<cl_program>(program), func_name,
541-
function_pointer_ret));
604+
return pi_ret_err;
542605
}
543606

544607
pi_result piContextCreate(const pi_context_properties *properties,

sycl/source/detail/common.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,8 @@ const char *stringifyErrorCode(cl_int error) {
216216
case CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR:
217217
return "CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR";
218218
*/
219+
case PI_FUNCTION_ADDRESS_IS_NOT_AVAILABLE:
220+
return "Function exists but address is not available";
219221
default:
220222
return "Unknown OpenCL error code";
221223
}

0 commit comments

Comments
 (0)