Skip to content

Commit 218db5d

Browse files
authored
Merge pull request #553 from aarongreig/aaron/kernelExecutionTests
Add kernel execution tests.
2 parents f85ee5b + b09aeb6 commit 218db5d

20 files changed

+539
-105
lines changed

scripts/generate_kernel_header.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""
2+
Copyright (C) 2022 Intel Corporation
3+
4+
Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
5+
See LICENSE.TXT
6+
SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
8+
"""
9+
import argparse
10+
import os
11+
import re
12+
import subprocess
13+
import sys
14+
15+
from mako.template import Template
16+
17+
HEADER_TEMPLATE = Template("""/*
18+
*
19+
* Copyright (C) 2023 Intel Corporation
20+
*
21+
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
22+
* See LICENSE.TXT
23+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
24+
*
25+
* @file ${file_name}.h
26+
*
27+
*/
28+
29+
#include <map>
30+
#include <string>
31+
#include <vector>
32+
33+
namespace uur {
34+
namespace device_binaries {
35+
std::map<std::string, std::vector<std::string>> program_kernel_map = {
36+
% for program, entry_points in kernel_name_dict.items():
37+
{"${program}", {
38+
% for entry_point in entry_points:
39+
"${entry_point}",
40+
% endfor
41+
}},
42+
% endfor
43+
};
44+
}
45+
}
46+
""")
47+
48+
49+
def generate_header(output_file, kernel_name_dict):
50+
"""Render the template and write it to the output file."""
51+
file_name = os.path.basename(output_file)
52+
rendered = HEADER_TEMPLATE.render(file_name=file_name,
53+
kernel_name_dict=kernel_name_dict)
54+
rendered = re.sub(r"\r\n", r"\n", rendered)
55+
56+
with open(output_file, "w") as fout:
57+
fout.write(rendered)
58+
59+
60+
def get_mangled_names(dpcxx_path, source_file, output_header):
61+
"""Return a list of all the entry point names from a given sycl source file.
62+
63+
Filters out wrapper and offset handler entry points.
64+
"""
65+
output_dir = os.path.dirname(output_header)
66+
il_file = os.path.join(output_dir, os.path.basename(source_file) + ".ll")
67+
generate_il_command = f"""\
68+
{dpcxx_path} -S -fsycl -fsycl-device-code-split=off \
69+
-fsycl-device-only -o {il_file} {source_file}"""
70+
subprocess.run(generate_il_command, shell=True)
71+
kernel_line_regex = re.compile("define.*spir_kernel")
72+
definition_lines = []
73+
with open(il_file) as f:
74+
lines = f.readlines()
75+
for line in lines:
76+
if kernel_line_regex.search(line) is not None:
77+
definition_lines.append(line)
78+
79+
entry_point_names = []
80+
kernel_name_regex = re.compile(r"@(.*?)\(")
81+
for line in definition_lines:
82+
if kernel_name_regex.search(line) is None:
83+
continue
84+
kernel_name = kernel_name_regex.search(line).group(1)
85+
if "kernel_wrapper" not in kernel_name and "with_offset" not in kernel_name:
86+
entry_point_names.append(kernel_name)
87+
88+
os.remove(il_file)
89+
return entry_point_names
90+
91+
92+
def main():
93+
parser = argparse.ArgumentParser()
94+
parser.add_argument("--dpcxx_path",
95+
help="Full path to dpc++ compiler executable.")
96+
parser.add_argument(
97+
"-o",
98+
"--output",
99+
help="Full path to header file that will be generated.")
100+
parser.add_argument("source_files", nargs="+")
101+
args = parser.parse_args()
102+
103+
mangled_names = {}
104+
105+
for source_file in args.source_files:
106+
program_name = os.path.splitext(os.path.basename(source_file))[0]
107+
mangled_names[program_name] = get_mangled_names(
108+
args.dpcxx_path, source_file, args.output)
109+
generate_header(args.output, mangled_names)
110+
111+
112+
if __name__ == "__main__":
113+
sys.exit(main())

test/conformance/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ function(add_conformance_test_with_kernels_environment name)
2828
add_conformance_test(${name} ${ARGN})
2929
target_compile_definitions("test-${name}" PRIVATE KERNELS_ENVIRONMENT PRIVATE
3030
KERNELS_DEFAULT_DIR="${UR_CONFORMANCE_DEVICE_BINARIES_DIR}")
31+
target_include_directories("test-${name}" PRIVATE ${UR_CONFORMANCE_DEVICE_BINARIES_DIR})
3132
add_dependencies("test-${name}" generate_device_binaries)
3233
endfunction()
3334

@@ -53,7 +54,6 @@ add_subdirectory(usm)
5354
add_subdirectory(event)
5455
add_subdirectory(queue)
5556
add_subdirectory(sampler)
56-
add_subdirectory(enqueue)
5757

5858
if(DEFINED UR_DPCXX)
5959
add_custom_target(generate_device_binaries)
@@ -71,4 +71,5 @@ if(DEFINED UR_DPCXX)
7171
add_subdirectory(device_code)
7272
add_subdirectory(kernel)
7373
add_subdirectory(program)
74+
add_subdirectory(enqueue)
7475
endif()

test/conformance/device_code/CMakeLists.txt

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,42 @@
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

6-
function(add_device_binary SOURCE_FILE)
6+
macro(add_device_binary SOURCE_FILE)
77
get_filename_component(KERNEL_NAME ${SOURCE_FILE} NAME_WE)
88
set(DEVICE_BINARY_DIR "${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/${KERNEL_NAME}")
99
file(MAKE_DIRECTORY ${DEVICE_BINARY_DIR})
1010
foreach(TRIPLE ${TARGET_TRIPLES})
11-
add_custom_target(${KERNEL_NAME}_${TRIPLE}_device_binary
12-
${UR_DPCXX} -fsycl -fsycl-targets=${TRIPLE} -fsycl-device-code-split=off
13-
${SOURCE_FILE} -o "${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}"
11+
set(EXE_PATH "${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}")
12+
add_custom_command(OUTPUT ${EXE_PATH}
13+
COMMAND ${UR_DPCXX} -fsycl -fsycl-targets=${TRIPLE} -fsycl-device-code-split=off
14+
${SOURCE_FILE} -o ${EXE_PATH}
1415
COMMAND ${CMAKE_COMMAND} -E env SYCL_DUMP_IMAGES=true
15-
"${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}" || (exit 0)
16+
${EXE_PATH} || (exit 0)
1617
WORKING_DIRECTORY "${DEVICE_BINARY_DIR}"
17-
COMMAND ${CMAKE_COMMAND} -E remove
18-
"${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}"
18+
DEPENDS ${SOURCE_FILE}
1919
)
20-
add_dependencies(generate_device_binaries ${KERNEL_NAME}_${TRIPLE}_device_binary)
20+
add_custom_target(generate_${KERNEL_NAME}_${TRIPLE} DEPENDS ${EXE_PATH})
21+
add_dependencies(generate_device_binaries generate_${KERNEL_NAME}_${TRIPLE})
2122
endforeach()
22-
endfunction()
23+
list(APPEND DEVICE_CODE_SOURCES ${SOURCE_FILE})
24+
endmacro()
2325

2426
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/bar.cpp)
2527
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill.cpp)
28+
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_2d.cpp)
29+
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_3d.cpp)
30+
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_usm.cpp)
2631
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/foo.cpp)
2732
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/image_copy.cpp)
2833
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/mean.cpp)
2934
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/spec_constant.cpp)
35+
36+
set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h)
37+
add_custom_command(OUTPUT ${KERNEL_HEADER}
38+
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/scripts
39+
COMMAND ${Python3_EXECUTABLE} generate_kernel_header.py
40+
--dpcxx_path ${UR_DPCXX} -o ${KERNEL_HEADER} ${DEVICE_CODE_SOURCES}
41+
DEPENDS ${PROJECT_SOURCE_DIR}/scripts/generate_kernel_header.py
42+
${DEVICE_CODE_SOURCES})
43+
add_custom_target(kernel_names_header DEPENDS ${KERNEL_HEADER})
44+
add_dependencies(generate_device_binaries kernel_names_header)

test/conformance/device_code/fill.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ int main() {
1515
sycl_queue.submit([&](cl::sycl::handler &cgh) {
1616
auto A_acc = A_buff.get_access<cl::sycl::access::mode::write>(cgh);
1717
cgh.parallel_for<class fill>(cl::sycl::range<1>{array_size},
18-
[=](cl::sycl::item<1> itemId) {
18+
[A_acc, val](cl::sycl::item<1> itemId) {
1919
auto id = itemId.get_id(0);
2020
A_acc[id] = val;
2121
});
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Copyright (C) 2023 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include <CL/sycl.hpp>
7+
8+
int main() {
9+
size_t nd_range_x = 8;
10+
size_t nd_range_y = 8;
11+
auto nd_range = cl::sycl::range<2>(nd_range_x, nd_range_y);
12+
13+
std::vector<uint32_t> A(nd_range_x * nd_range_y, 1);
14+
uint32_t val = 42;
15+
cl::sycl::queue sycl_queue;
16+
17+
auto work_range = cl::sycl::nd_range<2>(nd_range, cl::sycl::range<2>(1, 1));
18+
auto A_buff = cl::sycl::buffer<uint32_t>(
19+
A.data(), cl::sycl::range<1>(nd_range_x * nd_range_y));
20+
sycl_queue.submit([&](cl::sycl::handler &cgh) {
21+
auto A_acc = A_buff.get_access<cl::sycl::access::mode::write>(cgh);
22+
cgh.parallel_for<class fill_2d>(
23+
work_range, [A_acc, val](cl::sycl::nd_item<2> item_id) {
24+
auto id = item_id.get_global_linear_id();
25+
A_acc[id] = val;
26+
});
27+
});
28+
return 0;
29+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Copyright (C) 2023 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include <CL/sycl.hpp>
7+
8+
int main() {
9+
size_t nd_range_x = 4;
10+
size_t nd_range_y = 4;
11+
size_t nd_range_z = 4;
12+
auto nd_range = cl::sycl::range<3>(nd_range_x, nd_range_y, nd_range_z);
13+
14+
std::vector<uint32_t> A(nd_range_x * nd_range_y * nd_range_y, 1);
15+
uint32_t val = 42;
16+
cl::sycl::queue sycl_queue;
17+
18+
auto work_range =
19+
cl::sycl::nd_range<3>(nd_range, cl::sycl::range<3>(1, 1, 1));
20+
auto A_buff = cl::sycl::buffer<uint32_t>(
21+
A.data(), cl::sycl::range<1>(nd_range_x * nd_range_y));
22+
sycl_queue.submit([&](cl::sycl::handler &cgh) {
23+
auto A_acc = A_buff.get_access<cl::sycl::access::mode::write>(cgh);
24+
cgh.parallel_for<class fill_3d>(
25+
work_range, [A_acc, val](cl::sycl::nd_item<3> item_id) {
26+
auto id = item_id.get_global_linear_id();
27+
A_acc[id] = val;
28+
});
29+
});
30+
return 0;
31+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Copyright (C) 2023 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include <CL/sycl.hpp>
7+
8+
int main() {
9+
size_t array_size = 16;
10+
std::vector<uint32_t> A(array_size, 1);
11+
uint32_t val = 42;
12+
cl::sycl::queue sycl_queue;
13+
uint32_t *data = cl::sycl::malloc_shared<uint32_t>(array_size, sycl_queue);
14+
sycl_queue.submit([&](cl::sycl::handler &cgh) {
15+
cgh.parallel_for<class fill_usm>(cl::sycl::range<1>{array_size},
16+
[data, val](cl::sycl::item<1> itemId) {
17+
auto id = itemId.get_id(0);
18+
data[id] = val;
19+
});
20+
});
21+
return 0;
22+
}

test/conformance/enqueue/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

6-
add_conformance_test_with_devices_environment(enqueue
6+
add_conformance_test_with_kernels_environment(enqueue
77
urEnqueueEventsWait.cpp
88
urEnqueueEventsWaitWithBarrier.cpp
99
urEnqueueKernelLaunch.cpp

0 commit comments

Comments
 (0)