Skip to content

Commit b09aeb6

Browse files
committed
Add generated kernel name header instead of relying on kernel name query.
1 parent 62fb6f6 commit b09aeb6

17 files changed

+269
-100
lines changed

scripts/generate_kernel_header.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""
2+
Copyright (C) 2022 Intel Corporation
3+
4+
Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
5+
See LICENSE.TXT
6+
SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
8+
"""
9+
import argparse
10+
import os
11+
import re
12+
import subprocess
13+
import sys
14+
15+
from mako.template import Template
16+
17+
HEADER_TEMPLATE = Template("""/*
18+
*
19+
* Copyright (C) 2023 Intel Corporation
20+
*
21+
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
22+
* See LICENSE.TXT
23+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
24+
*
25+
* @file ${file_name}.h
26+
*
27+
*/
28+
29+
#include <map>
30+
#include <string>
31+
#include <vector>
32+
33+
namespace uur {
34+
namespace device_binaries {
35+
std::map<std::string, std::vector<std::string>> program_kernel_map = {
36+
% for program, entry_points in kernel_name_dict.items():
37+
{"${program}", {
38+
% for entry_point in entry_points:
39+
"${entry_point}",
40+
% endfor
41+
}},
42+
% endfor
43+
};
44+
}
45+
}
46+
""")
47+
48+
49+
def generate_header(output_file, kernel_name_dict):
50+
"""Render the template and write it to the output file."""
51+
file_name = os.path.basename(output_file)
52+
rendered = HEADER_TEMPLATE.render(file_name=file_name,
53+
kernel_name_dict=kernel_name_dict)
54+
rendered = re.sub(r"\r\n", r"\n", rendered)
55+
56+
with open(output_file, "w") as fout:
57+
fout.write(rendered)
58+
59+
60+
def get_mangled_names(dpcxx_path, source_file, output_header):
61+
"""Return a list of all the entry point names from a given sycl source file.
62+
63+
Filters out wrapper and offset handler entry points.
64+
"""
65+
output_dir = os.path.dirname(output_header)
66+
il_file = os.path.join(output_dir, os.path.basename(source_file) + ".ll")
67+
generate_il_command = f"""\
68+
{dpcxx_path} -S -fsycl -fsycl-device-code-split=off \
69+
-fsycl-device-only -o {il_file} {source_file}"""
70+
subprocess.run(generate_il_command, shell=True)
71+
kernel_line_regex = re.compile("define.*spir_kernel")
72+
definition_lines = []
73+
with open(il_file) as f:
74+
lines = f.readlines()
75+
for line in lines:
76+
if kernel_line_regex.search(line) is not None:
77+
definition_lines.append(line)
78+
79+
entry_point_names = []
80+
kernel_name_regex = re.compile(r"@(.*?)\(")
81+
for line in definition_lines:
82+
if kernel_name_regex.search(line) is None:
83+
continue
84+
kernel_name = kernel_name_regex.search(line).group(1)
85+
if "kernel_wrapper" not in kernel_name and "with_offset" not in kernel_name:
86+
entry_point_names.append(kernel_name)
87+
88+
os.remove(il_file)
89+
return entry_point_names
90+
91+
92+
def main():
93+
parser = argparse.ArgumentParser()
94+
parser.add_argument("--dpcxx_path",
95+
help="Full path to dpc++ compiler executable.")
96+
parser.add_argument(
97+
"-o",
98+
"--output",
99+
help="Full path to header file that will be generated.")
100+
parser.add_argument("source_files", nargs="+")
101+
args = parser.parse_args()
102+
103+
mangled_names = {}
104+
105+
for source_file in args.source_files:
106+
program_name = os.path.splitext(os.path.basename(source_file))[0]
107+
mangled_names[program_name] = get_mangled_names(
108+
args.dpcxx_path, source_file, args.output)
109+
generate_header(args.output, mangled_names)
110+
111+
112+
if __name__ == "__main__":
113+
sys.exit(main())

test/conformance/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ function(add_conformance_test_with_kernels_environment name)
2828
add_conformance_test(${name} ${ARGN})
2929
target_compile_definitions("test-${name}" PRIVATE KERNELS_ENVIRONMENT PRIVATE
3030
KERNELS_DEFAULT_DIR="${UR_CONFORMANCE_DEVICE_BINARIES_DIR}")
31+
target_include_directories("test-${name}" PRIVATE ${UR_CONFORMANCE_DEVICE_BINARIES_DIR})
3132
add_dependencies("test-${name}" generate_device_binaries)
3233
endfunction()
3334

test/conformance/device_code/CMakeLists.txt

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,42 @@
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

6-
function(add_device_binary SOURCE_FILE)
6+
macro(add_device_binary SOURCE_FILE)
77
get_filename_component(KERNEL_NAME ${SOURCE_FILE} NAME_WE)
88
set(DEVICE_BINARY_DIR "${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/${KERNEL_NAME}")
99
file(MAKE_DIRECTORY ${DEVICE_BINARY_DIR})
1010
foreach(TRIPLE ${TARGET_TRIPLES})
11-
add_custom_target(${KERNEL_NAME}_${TRIPLE}_device_binary
12-
${UR_DPCXX} -fsycl -fsycl-targets=${TRIPLE} -fsycl-device-code-split=off
13-
${SOURCE_FILE} -o "${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}"
11+
set(EXE_PATH "${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}")
12+
add_custom_command(OUTPUT ${EXE_PATH}
13+
COMMAND ${UR_DPCXX} -fsycl -fsycl-targets=${TRIPLE} -fsycl-device-code-split=off
14+
${SOURCE_FILE} -o ${EXE_PATH}
1415
COMMAND ${CMAKE_COMMAND} -E env SYCL_DUMP_IMAGES=true
15-
"${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}" || (exit 0)
16+
${EXE_PATH} || (exit 0)
1617
WORKING_DIRECTORY "${DEVICE_BINARY_DIR}"
17-
COMMAND ${CMAKE_COMMAND} -E remove
18-
"${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}"
18+
DEPENDS ${SOURCE_FILE}
1919
)
20-
add_dependencies(generate_device_binaries ${KERNEL_NAME}_${TRIPLE}_device_binary)
20+
add_custom_target(generate_${KERNEL_NAME}_${TRIPLE} DEPENDS ${EXE_PATH})
21+
add_dependencies(generate_device_binaries generate_${KERNEL_NAME}_${TRIPLE})
2122
endforeach()
22-
endfunction()
23+
list(APPEND DEVICE_CODE_SOURCES ${SOURCE_FILE})
24+
endmacro()
2325

2426
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/bar.cpp)
2527
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill.cpp)
2628
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_2d.cpp)
2729
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_3d.cpp)
30+
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_usm.cpp)
2831
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/foo.cpp)
2932
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/image_copy.cpp)
3033
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/mean.cpp)
3134
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/spec_constant.cpp)
35+
36+
set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h)
37+
add_custom_command(OUTPUT ${KERNEL_HEADER}
38+
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/scripts
39+
COMMAND ${Python3_EXECUTABLE} generate_kernel_header.py
40+
--dpcxx_path ${UR_DPCXX} -o ${KERNEL_HEADER} ${DEVICE_CODE_SOURCES}
41+
DEPENDS ${PROJECT_SOURCE_DIR}/scripts/generate_kernel_header.py
42+
${DEVICE_CODE_SOURCES})
43+
add_custom_target(kernel_names_header DEPENDS ${KERNEL_HEADER})
44+
add_dependencies(generate_device_binaries kernel_names_header)

test/conformance/device_code/fill_2d.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// Copyright (C) 2023 Intel Corporation
2-
// SPDX-License-Identifier: MIT
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
35

46
#include <CL/sycl.hpp>
57

test/conformance/device_code/fill_3d.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// Copyright (C) 2023 Intel Corporation
2-
// SPDX-License-Identifier: MIT
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
35

46
#include <CL/sycl.hpp>
57

@@ -13,7 +15,8 @@ int main() {
1315
uint32_t val = 42;
1416
cl::sycl::queue sycl_queue;
1517

16-
auto work_range = cl::sycl::nd_range<3>(nd_range, cl::sycl::range<3>(1, 1, 1));
18+
auto work_range =
19+
cl::sycl::nd_range<3>(nd_range, cl::sycl::range<3>(1, 1, 1));
1720
auto A_buff = cl::sycl::buffer<uint32_t>(
1821
A.data(), cl::sycl::range<1>(nd_range_x * nd_range_y));
1922
sycl_queue.submit([&](cl::sycl::handler &cgh) {
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Copyright (C) 2023 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include <CL/sycl.hpp>
7+
8+
int main() {
9+
size_t array_size = 16;
10+
std::vector<uint32_t> A(array_size, 1);
11+
uint32_t val = 42;
12+
cl::sycl::queue sycl_queue;
13+
uint32_t *data = cl::sycl::malloc_shared<uint32_t>(array_size, sycl_queue);
14+
sycl_queue.submit([&](cl::sycl::handler &cgh) {
15+
cgh.parallel_for<class fill_usm>(cl::sycl::range<1>{array_size},
16+
[data, val](cl::sycl::item<1> itemId) {
17+
auto id = itemId.get_id(0);
18+
data[id] = val;
19+
});
20+
});
21+
return 0;
22+
}

test/conformance/enqueue/urEnqueueKernelLaunch.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ TEST_P(urEnqueueKernelLaunchTest, InvalidNullPtrEventWaitList) {
4949
nullptr, 1, nullptr, nullptr),
5050
UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST);
5151

52-
// does this make sense??
5352
ur_event_handle_t validEvent;
5453
ASSERT_SUCCESS(urEnqueueEventsWait(queue, 0, nullptr, &validEvent));
5554

@@ -90,8 +89,8 @@ TEST_P(urEnqueueKernelLaunch2DTest, Success) {
9089
AddBuffer1DArg(buffer_size, &buffer);
9190
AddPodArg(val);
9291
ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
93-
global_offset, global_size, nullptr,
94-
0, nullptr, nullptr));
92+
global_offset, global_size, nullptr, 0,
93+
nullptr, nullptr));
9594
ASSERT_SUCCESS(urQueueFinish(queue));
9695
ValidateBuffer(buffer, buffer_size, val);
9796
}
@@ -105,7 +104,8 @@ struct urEnqueueKernelLaunch3DTest : uur::urKernelExecutionTest {
105104
uint32_t val = 42;
106105
size_t global_size[3] = {4, 4, 4};
107106
size_t global_offset[3] = {0, 0, 0};
108-
size_t buffer_size = sizeof(val) * global_size[0] * global_size[1] * global_size[2];
107+
size_t buffer_size =
108+
sizeof(val) * global_size[0] * global_size[1] * global_size[2];
109109
size_t n_dimensions = 3;
110110
};
111111
UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunch3DTest);
@@ -115,8 +115,8 @@ TEST_P(urEnqueueKernelLaunch3DTest, Success) {
115115
AddBuffer1DArg(buffer_size, &buffer);
116116
AddPodArg(val);
117117
ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
118-
global_offset, global_size, nullptr,
119-
0, nullptr, nullptr));
118+
global_offset, global_size, nullptr, 0,
119+
nullptr, nullptr));
120120
ASSERT_SUCCESS(urQueueFinish(queue));
121121
ValidateBuffer(buffer, buffer_size, val);
122122
}

test/conformance/kernel/urKernelCreate.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,10 @@ struct urKernelCreateTest : uur::urProgramTest {
99
void SetUp() override {
1010
UUR_RETURN_ON_FATAL_FAILURE(urProgramTest::SetUp());
1111
ASSERT_SUCCESS(urProgramBuild(context, program, nullptr));
12-
size_t kernel_string_size = 0;
13-
ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_KERNEL_NAMES,
14-
0, nullptr, &kernel_string_size));
15-
std::string kernel_string;
16-
kernel_string.resize(kernel_string_size);
17-
ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_KERNEL_NAMES,
18-
kernel_string.size(),
19-
kernel_string.data(), nullptr));
20-
kernel_name = kernel_string.substr(0, kernel_string.find(";"));
12+
auto kernel_names =
13+
uur::KernelsEnvironment::instance->GetEntryPointNames(
14+
this->program_name);
15+
kernel_name = kernel_names[0];
2116
}
2217

2318
void TearDown() override {

test/conformance/kernel/urKernelGetInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,11 @@ TEST_P(urKernelGetInfoTest, InvalidNullPointerPropValue) {
5858
size_t n_args = 0;
5959
ASSERT_EQ_RESULT(urKernelGetInfo(kernel, UR_KERNEL_INFO_NUM_ARGS,
6060
sizeof(n_args), nullptr, nullptr),
61-
UR_RESULT_ERROR_INVALID_SIZE);
61+
UR_RESULT_ERROR_INVALID_NULL_POINTER);
6262
}
6363

6464
TEST_P(urKernelGetInfoTest, InvalidNullPointerPropSizeRet) {
6565
ASSERT_EQ_RESULT(
6666
urKernelGetInfo(kernel, UR_KERNEL_INFO_NUM_ARGS, 0, nullptr, nullptr),
67-
UR_RESULT_ERROR_INVALID_SIZE);
67+
UR_RESULT_ERROR_INVALID_NULL_POINTER);
6868
}

0 commit comments

Comments
 (0)