Skip to content

Commit b19ac93

Browse files
committed
Improve CTS device code generation
Some improvements to the device code generation cmake: * Use `clang-offload-extract` instead of running the binary directly. * Set up dependencies correctly so that re-running builds will generate missing IR files if they failed to build prior. * Dynamically generate the list of device_code files - from now on, any .cpp file in the device_code directory will automatically be treated as device code and generate IR files. This change drops CTS support for platforms that support multiple devices with different IR types, however the build system didn't support that anyway, nor do any of our adapters. This change requires a `clang-offload-extract` binary to be available. By default it is assumed to be in the same directory as the provided DPC++ binary (which should work for most builds), however the CMake variable `UR_DEVICE_CODE_EXTRACTOR` is provided to override this.
1 parent f33b941 commit b19ac93

File tree

7 files changed

+39
-56
lines changed

7 files changed

+39
-56
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ List of options provided by CMake:
141141
| UR_HIP_PLATFORM | Build HIP adapter for AMD or NVIDIA platform | AMD/NVIDIA | AMD |
142142
| UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD |
143143
| UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` |
144+
| UR_DEVICE_CODE_EXTRACTOR | Path of the `clang-offload-extract` executable from the DPC++ package, required for CTS device binaries | File path | `"${dirname(UR_DPCXX)}/clang-offload-extract"` |
144145
| UR_DPCXX_BUILD_FLAGS | Build flags to pass to DPC++ when compiling device programs | Space-separated options list | `""` |
145146
| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` |
146147
| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` |

test/conformance/device_code/CMakeLists.txt

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ else()
1010
set(AMD_ARCH "${UR_CONFORMANCE_AMD_ARCH}")
1111
endif()
1212

13+
cmake_path(GET UR_DPCXX EXTENSION EXE)
14+
cmake_path(REPLACE_FILENAME UR_DPCXX "clang-offload-extract${EXE}" OUTPUT_VARIABLE DEFAULT_EXTRACTOR_NAME)
15+
set(UR_DEVICE_CODE_EXTRACTOR "${DEFAULT_EXTRACTOR_NAME}" CACHE PATH "Path to clang-offload-extract")
16+
1317
if("${AMD_ARCH}" STREQUAL "" AND "${TARGET_TRIPLES}" MATCHES "amd")
1418
find_package(RocmAgentEnumerator)
1519
if(NOT ROCM_AGENT_ENUMERATOR_FOUND)
@@ -59,6 +63,8 @@ macro(add_device_binary SOURCE_FILE)
5963

6064
foreach(TRIPLE ${TARGET_TRIPLES})
6165
set(EXE_PATH "${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}")
66+
set(BIN_PATH "${DEVICE_BINARY_DIR}/${TRIPLE}.bin.0")
67+
6268
if(${TRIPLE} MATCHES "amd")
6369
set(AMD_TARGET_BACKEND -Xsycl-target-backend=${TRIPLE})
6470
set(AMD_OFFLOAD_ARCH --offload-arch=${AMD_ARCH})
@@ -81,17 +87,17 @@ macro(add_device_binary SOURCE_FILE)
8187
continue()
8288
endif()
8389

84-
add_custom_command(OUTPUT ${EXE_PATH}
90+
add_custom_command(OUTPUT "${BIN_PATH}"
8591
COMMAND ${UR_DPCXX} -fsycl -fsycl-targets=${TRIPLE} -fsycl-device-code-split=off
8692
${AMD_TARGET_BACKEND} ${AMD_OFFLOAD_ARCH} ${AMD_NOGPULIB}
8793
${DPCXX_BUILD_FLAGS_LIST} ${SOURCE_FILE} -o ${EXE_PATH}
8894

89-
COMMAND ${CMAKE_COMMAND} -E env ${EXTRA_ENV} SYCL_DUMP_IMAGES=true
90-
${EXE_PATH} || exit 0
95+
COMMAND ${CMAKE_COMMAND} -E env ${EXTRA_ENV} ${UR_DEVICE_CODE_EXTRACTOR} --stem="${TRIPLE}.bin" ${EXE_PATH}
96+
9197
WORKING_DIRECTORY "${DEVICE_BINARY_DIR}"
9298
DEPENDS ${SOURCE_FILE}
9399
)
94-
add_custom_target(generate_${KERNEL_NAME}_${TRIPLE} DEPENDS ${EXE_PATH})
100+
add_custom_target(generate_${KERNEL_NAME}_${TRIPLE} DEPENDS ${BIN_PATH})
95101
add_dependencies(generate_device_binaries generate_${KERNEL_NAME}_${TRIPLE})
96102
endforeach()
97103
list(APPEND DEVICE_CODE_SOURCES ${SOURCE_FILE})

test/conformance/program/urProgramBuild.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ TEST_P(urProgramBuildTest, InvalidNullHandleProgram) {
3030
TEST_P(urProgramBuildTest, BuildFailure) {
3131
ur_program_handle_t program = nullptr;
3232
std::shared_ptr<std::vector<char>> il_binary;
33-
uur::KernelsEnvironment::instance->LoadSource("build_failure", 0,
34-
il_binary);
33+
uur::KernelsEnvironment::instance->LoadSource("build_failure", il_binary);
3534
if (!il_binary) {
3635
// The build failure we are testing for happens at SYCL compile time on
3736
// AMD and Nvidia, so no binary exists to check for a build failure

test/conformance/program/urProgramCreateWithIL.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ struct urProgramCreateWithILTest : uur::urContextTest {
1717
if (backend == UR_PLATFORM_BACKEND_HIP) {
1818
GTEST_SKIP();
1919
}
20-
uur::KernelsEnvironment::instance->LoadSource("foo", 0, il_binary);
20+
uur::KernelsEnvironment::instance->LoadSource("foo", il_binary);
2121
}
2222

2323
void TearDown() override {

test/conformance/source/environment.cpp

Lines changed: 20 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ KernelsEnvironment::parseKernelOptions(int argc, char **argv,
367367
return options;
368368
}
369369

370-
std::string KernelsEnvironment::getSupportedILPostfix(uint32_t device_index) {
370+
std::string KernelsEnvironment::getTargetName() {
371371
std::stringstream IL;
372372

373373
if (instance->GetDevices().size() == 0) {
@@ -382,66 +382,44 @@ std::string KernelsEnvironment::getSupportedILPostfix(uint32_t device_index) {
382382
error = "failed to get backend from platform.";
383383
return {};
384384
}
385-
if (backend == UR_PLATFORM_BACKEND_HIP) {
386-
return ".bin";
387-
}
388385

389-
auto device = instance->GetDevices()[device_index];
390-
std::string IL_version;
391-
if (uur::GetDeviceILVersion(device, IL_version)) {
392-
error = "failed to get device IL version";
386+
std::string target = "";
387+
switch (backend) {
388+
case UR_PLATFORM_BACKEND_OPENCL:
389+
case UR_PLATFORM_BACKEND_LEVEL_ZERO:
390+
return "spir64";
391+
case UR_PLATFORM_BACKEND_CUDA:
392+
return "nvptx64-nvidia-cuda";
393+
case UR_PLATFORM_BACKEND_HIP:
394+
return "amdgcn-amd-amdhsa";
395+
case UR_PLATFORM_BACKEND_NATIVE_CPU:
396+
error = "native_cpu doesn't support kernel tests yet";
393397
return {};
394-
}
395-
396-
// TODO: This potentially needs updating as more adapters are tested.
397-
if (IL_version.find("SPIR-V") != std::string::npos) {
398-
IL << ".spv";
399-
} else if (IL_version.find("nvptx") != std::string::npos) {
400-
IL << ".bin";
401-
} else {
402-
error = "Undefined IL version: " + IL_version;
398+
default:
399+
error = "unknown target.";
403400
return {};
404401
}
405-
406-
return IL.str();
407402
}
408403

409404
std::string
410-
KernelsEnvironment::getKernelSourcePath(const std::string &kernel_name,
411-
uint32_t device_index) {
405+
KernelsEnvironment::getKernelSourcePath(const std::string &kernel_name) {
412406
std::stringstream path;
413407
path << kernel_options.kernel_directory << "/" << kernel_name;
414-
std::string il_postfix = getSupportedILPostfix(device_index);
415-
416-
if (il_postfix.empty()) {
417-
return {};
418-
}
419-
420-
std::string binary_name;
421-
for (const auto &entry : filesystem::directory_iterator(path.str())) {
422-
auto file_name = entry.path().filename().string();
423-
if (file_name.find(il_postfix) != std::string::npos) {
424-
binary_name = file_name;
425-
break;
426-
}
427-
}
428408

429-
if (binary_name.empty()) {
430-
error =
431-
"failed retrieving kernel source path for kernel: " + kernel_name;
409+
std::string target_name = getTargetName();
410+
if (target_name.empty()) {
432411
return {};
433412
}
434413

435-
path << "/" << binary_name;
414+
path << "/" << target_name << ".bin.0";
436415

437416
return path.str();
438417
}
439418

440419
void KernelsEnvironment::LoadSource(
441-
const std::string &kernel_name, uint32_t device_index,
420+
const std::string &kernel_name,
442421
std::shared_ptr<std::vector<char>> &binary_out) {
443-
std::string source_path =
444-
instance->getKernelSourcePath(kernel_name, device_index);
422+
std::string source_path = instance->getKernelSourcePath(kernel_name);
445423

446424
if (source_path.empty()) {
447425
FAIL() << error;

test/conformance/testing/include/uur/environment.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ struct KernelsEnvironment : DevicesEnvironment {
7272
virtual void SetUp() override;
7373
virtual void TearDown() override;
7474

75-
void LoadSource(const std::string &kernel_name, uint32_t device_index,
75+
void LoadSource(const std::string &kernel_name,
7676
std::shared_ptr<std::vector<char>> &binary_out);
7777

7878
ur_result_t CreateProgram(ur_platform_handle_t hPlatform,
@@ -89,9 +89,8 @@ struct KernelsEnvironment : DevicesEnvironment {
8989
private:
9090
KernelOptions parseKernelOptions(int argc, char **argv,
9191
const std::string &kernels_default_dir);
92-
std::string getKernelSourcePath(const std::string &kernel_name,
93-
uint32_t device_index);
94-
std::string getSupportedILPostfix(uint32_t device_index);
92+
std::string getKernelSourcePath(const std::string &kernel_name);
93+
std::string getTargetName();
9594

9695
KernelOptions kernel_options;
9796
// mapping between kernels (full_path + kernel_name) and their saved source.

test/conformance/testing/include/uur/fixtures.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ struct urHostPipeTest : urQueueTest {
350350
void SetUp() override {
351351
UUR_RETURN_ON_FATAL_FAILURE(urQueueTest::SetUp());
352352
UUR_RETURN_ON_FATAL_FAILURE(
353-
uur::KernelsEnvironment::instance->LoadSource("foo", 0, il_binary));
353+
uur::KernelsEnvironment::instance->LoadSource("foo", il_binary));
354354
ASSERT_SUCCESS(uur::KernelsEnvironment::instance->CreateProgram(
355355
platform, context, device, *il_binary, nullptr, &program));
356356

@@ -1135,7 +1135,7 @@ struct urProgramTest : urQueueTest {
11351135
GTEST_SKIP();
11361136
}
11371137
UUR_RETURN_ON_FATAL_FAILURE(
1138-
uur::KernelsEnvironment::instance->LoadSource(program_name, 0,
1138+
uur::KernelsEnvironment::instance->LoadSource(program_name,
11391139
il_binary));
11401140

11411141
const ur_program_properties_t properties = {
@@ -1174,7 +1174,7 @@ template <class T> struct urProgramTestWithParam : urQueueTestWithParam<T> {
11741174
}
11751175

11761176
UUR_RETURN_ON_FATAL_FAILURE(
1177-
uur::KernelsEnvironment::instance->LoadSource(program_name, 0,
1177+
uur::KernelsEnvironment::instance->LoadSource(program_name,
11781178
il_binary));
11791179
ASSERT_SUCCESS(uur::KernelsEnvironment::instance->CreateProgram(
11801180
this->platform, this->context, this->device, *il_binary, nullptr,

0 commit comments

Comments
 (0)