Skip to content

Commit b42de17

Browse files
[SYCL] Relax linking workaround and test SYCL interlinking (#19171)
This commit relaxes a workaround for linking inline SYCL code with SYCLBIN. That is, instead of separating all offline images into separate linking, it instead separates all binaries with specialization constants into their own linking runs. Additionally, it relaxes the requirement for dependency resolution when device images are looked up for anything other than executable state, avoiding errors when the symbol resolution is done manually. --------- Signed-off-by: Larsen, Steffen <steffen.larsen@intel.com>
1 parent 158b9bb commit b42de17

File tree

8 files changed

+189
-15
lines changed

8 files changed

+189
-15
lines changed

sycl/source/detail/kernel_bundle_impl.hpp

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -248,21 +248,26 @@ class kernel_bundle_impl
248248

249249
// Due to a bug in L0, specializations with conflicting IDs will overwrite
250250
// each other when linked together, so to avoid this issue we link
251-
// regular offline-compiled SYCL device images in separation.
251+
// images with specialization constants in separation.
252252
// TODO: Remove when spec const overwriting issue has been fixed in L0.
253-
std::vector<const DevImgPlainWithDeps *> OfflineDeviceImages;
253+
std::vector<const DevImgPlainWithDeps *> ImagesWithSpecConsts;
254254
std::unordered_set<std::shared_ptr<device_image_impl>>
255-
OfflineDeviceImageSet;
255+
ImagesWithSpecConstsSet;
256256
for (const kernel_bundle<bundle_state::object> &ObjectBundle :
257257
ObjectBundles) {
258258
for (const DevImgPlainWithDeps &DeviceImageWithDeps :
259259
getSyclObjImpl(ObjectBundle)->MDeviceImages) {
260-
if (getSyclObjImpl(DeviceImageWithDeps.getMain())->getOriginMask() &
261-
ImageOriginSYCLOffline) {
262-
OfflineDeviceImages.push_back(&DeviceImageWithDeps);
263-
for (const device_image_plain &DevImg : DeviceImageWithDeps)
264-
OfflineDeviceImageSet.insert(getSyclObjImpl(DevImg));
265-
}
260+
if (std::none_of(DeviceImageWithDeps.begin(), DeviceImageWithDeps.end(),
261+
[](const device_image_plain &DevImg) {
262+
const RTDeviceBinaryImage *BinImg =
263+
getSyclObjImpl(DevImg)->get_bin_image_ref();
264+
return BinImg && BinImg->getSpecConstants().size();
265+
}))
266+
continue;
267+
268+
ImagesWithSpecConsts.push_back(&DeviceImageWithDeps);
269+
for (const device_image_plain &DevImg : DeviceImageWithDeps)
270+
ImagesWithSpecConstsSet.insert(getSyclObjImpl(DevImg));
266271
}
267272
}
268273

@@ -274,8 +279,8 @@ class kernel_bundle_impl
274279
ObjectBundles)
275280
for (const device_image_plain &DevImg :
276281
getSyclObjImpl(ObjectBundle)->MUniqueDeviceImages)
277-
if (OfflineDeviceImageSet.find(getSyclObjImpl(DevImg)) ==
278-
OfflineDeviceImageSet.end())
282+
if (ImagesWithSpecConstsSet.find(getSyclObjImpl(DevImg)) ==
283+
ImagesWithSpecConstsSet.end())
279284
DevImagesSet.insert(getSyclObjImpl(DevImg));
280285
DevImages.reserve(DevImagesSet.size());
281286
for (auto It = DevImagesSet.begin(); It != DevImagesSet.end();)
@@ -391,7 +396,8 @@ class kernel_bundle_impl
391396
}
392397

393398
// ... And link the offline images in separation. (Workaround.)
394-
for (const DevImgPlainWithDeps *DeviceImageWithDeps : OfflineDeviceImages) {
399+
for (const DevImgPlainWithDeps *DeviceImageWithDeps :
400+
ImagesWithSpecConsts) {
395401
// Skip images which are not compatible with devices provided
396402
if (std::none_of(MDevices.begin(), MDevices.end(),
397403
[DeviceImageWithDeps](const device &Dev) {

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2598,7 +2598,10 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState(
25982598
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
25992599
ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
26002600
}
2601-
ImgInfo.Deps = collectDeviceImageDeps(*BinImage, {DevImpl});
2601+
ImgInfo.Deps =
2602+
collectDeviceImageDeps(*BinImage, {DevImpl},
2603+
/*ErrorOnUnresolvableImport=*/TargetState ==
2604+
bundle_state::executable);
26022605
}
26032606
const bundle_state ImgState = ImgInfo.State;
26042607
const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#include <sycl/sycl.hpp>
2+
3+
namespace syclext = sycl::ext::oneapi;
4+
namespace syclexp = sycl::ext::oneapi::experimental;
5+
6+
typedef void (*FuncPtrT)(size_t *);
7+
8+
struct ArgsT {
9+
size_t *Ptr;
10+
FuncPtrT *FuncPtr;
11+
};
12+
13+
SYCL_EXTERNAL size_t GetID() {
14+
return syclext::this_work_item::get_nd_item<1>().get_global_id();
15+
}
16+
17+
extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
18+
(syclexp::nd_range_kernel<1>)) void Kernel(ArgsT Args) {
19+
(**Args.FuncPtr)(Args.Ptr);
20+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#include "common.hpp"
2+
3+
#include <sycl/usm.hpp>
4+
5+
namespace syclexp = sycl::ext::oneapi::experimental;
6+
7+
typedef void (*FuncPtrT)(size_t *);
8+
9+
struct ArgsT {
10+
size_t *Ptr;
11+
FuncPtrT *FuncPtr;
12+
};
13+
14+
#ifdef __SYCL_DEVICE_ONLY__
15+
SYCL_EXTERNAL size_t GetID();
16+
#else
17+
// Host-side code to avoid linker problems. Will never be called.
18+
SYCL_EXTERNAL size_t GetID() { return 0; }
19+
#endif
20+
21+
SYCL_EXTERNAL
22+
void Func(size_t *Ptr) {
23+
size_t GlobalID = GetID();
24+
Ptr[GlobalID] = GlobalID;
25+
}
26+
27+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel))
28+
void GetFuncPtr(ArgsT Args) { *Args.FuncPtr = Func; }
29+
30+
constexpr size_t N = 32;
31+
32+
int main(int argc, char *argv[]) {
33+
assert(argc == 2);
34+
35+
sycl::queue Q;
36+
37+
int Failed = CommonLoadCheck(Q.get_context(), argv[1]);
38+
39+
#if defined(SYCLBIN_INPUT_STATE)
40+
auto SYCLBINInput = syclexp::get_kernel_bundle<sycl::bundle_state::input>(
41+
Q.get_context(), std::string{argv[1]});
42+
auto SYCLBINObj = sycl::compile(SYCLBINInput);
43+
#elif defined(SYCLBIN_OBJECT_STATE)
44+
auto SYCLBINObj = syclexp::get_kernel_bundle<sycl::bundle_state::object>(
45+
Q.get_context(), std::string{argv[1]});
46+
#else // defined(SYCLBIN_EXECUTABLE_STATE)
47+
#error "Test does not work with executable state."
48+
#endif
49+
50+
auto KBObj =
51+
syclexp::get_kernel_bundle<GetFuncPtr, sycl::bundle_state::object>(
52+
Q.get_context());
53+
auto KBExe = sycl::link({KBObj, SYCLBINObj});
54+
55+
ArgsT Args{};
56+
Args.FuncPtr = sycl::malloc_shared<FuncPtrT>(N, Q);
57+
Args.Ptr = sycl::malloc_shared<size_t>(N, Q);
58+
59+
sycl::kernel GetFuncPtrKern = KBExe.ext_oneapi_get_kernel<GetFuncPtr>();
60+
Q.submit([&](sycl::handler &CGH) {
61+
CGH.set_args(Args);
62+
CGH.single_task(GetFuncPtrKern);
63+
}).wait();
64+
65+
sycl::kernel Kern = KBExe.ext_oneapi_get_kernel("Kernel");
66+
Q.submit([&](sycl::handler &CGH) {
67+
CGH.set_args(Args);
68+
CGH.parallel_for(sycl::nd_range{{N}, {N}}, Kern);
69+
}).wait();
70+
71+
for (size_t I = 0; I < N; ++I) {
72+
if (Args.Ptr[I] != I) {
73+
std::cout << Args.Ptr[I] << " != " << I << std::endl;
74+
++Failed;
75+
}
76+
}
77+
78+
sycl::free(Args.FuncPtr, Q);
79+
sycl::free(Args.Ptr, Q);
80+
81+
return Failed;
82+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
//==-------- link_sycl_inline_input.cpp --- SYCLBIN extension tests --------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// REQUIRES: aspect-usm_shared_allocations
10+
11+
// -- Test for linking between inline SYCL code and SYCLBIN code.
12+
13+
// ptxas currently fails to compile images with unresolved symbols. Disable for
14+
// other targets than SPIR-V until this has been resolved. (CMPLRLLVM-68810)
15+
// Note: %{sycl_target_opts} should be added to the SYCLBIN compilation lines
16+
// once fixed.
17+
// REQUIRES: target-spir
18+
19+
// XFAIL: opencl && cpu
20+
// XFAIL-TRACKER: CMPLRLLVM-68800
21+
22+
// XFAIL: linux && arch-intel_gpu_bmg_g21
23+
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/19258
24+
25+
// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies -Xclang -fsycl-allow-func-ptr %S/Inputs/link_sycl_inline.cpp -o %t.syclbin
26+
// RUN: %{build} -fsycl-allow-device-image-dependencies -o %t.out
27+
// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin
28+
29+
#define SYCLBIN_INPUT_STATE
30+
31+
#include "Inputs/link_sycl_inline.hpp"
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
//==-------- link_sycl_inline_object.cpp --- SYCLBIN extension tests
2+
//--------==//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
// REQUIRES: aspect-usm_shared_allocations
11+
12+
// -- Test for linking between inline SYCL code and SYCLBIN code.
13+
14+
// ptxas currently fails to compile images with unresolved symbols. Disable for
15+
// other targets than SPIR-V until this has been resolved. (CMPLRLLVM-68810)
16+
// Note: %{sycl_target_opts} should be added to the SYCLBIN compilation lines
17+
// once fixed.
18+
// REQUIRES: target-spir
19+
20+
// XFAIL: opencl && cpu
21+
// XFAIL-TRACKER: CMPLRLLVM-68800
22+
23+
// XFAIL: linux && arch-intel_gpu_bmg_g21
24+
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/19258
25+
26+
// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies -Xclang -fsycl-allow-func-ptr %S/Inputs/link_sycl_inline.cpp -o %t.syclbin
27+
// RUN: %{build} -fsycl-allow-device-image-dependencies -o %t.out
28+
// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin
29+
30+
#define SYCLBIN_OBJECT_STATE
31+
32+
#include "Inputs/link_sycl_inline.hpp"

sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
// CHECK-DAG: README.md
77
// CHECK-DAG: lit.cfg.py
88
//
9-
// CHECK-NUM-MATCHES: 28
9+
// CHECK-NUM-MATCHES: 29
1010
//
1111
// This test verifies that `<sycl/sycl.hpp>` isn't used in E2E tests. Instead,
1212
// fine-grained includes should used, see

sycl/unittests/kernel-and-program/OutOfResources.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ TEST_P(OutOfResourcesTestSuite, urProgramLink) {
173173
auto b3 = sycl::link({b1, b2});
174174
EXPECT_FALSE(outOfResourcesToggle);
175175
// one restart due to out of resources, one link per each of b1 and b2.
176-
EXPECT_EQ(nProgramLink, 3);
176+
EXPECT_EQ(nProgramLink, 2);
177177
// no programs should be in the cache due to out of resources.
178178
{
179179
detail::KernelProgramCache::ProgramCache &Cache =

0 commit comments

Comments
 (0)