Skip to content

Commit e2eaf58

Browse files
jinge90steffenlarsenAlexeySachkov
authored
[SYCL] Embed bfloat16 devicelib into executable if necessary (#16729)
Currently, sycl bfloat16 conversion functions are implemented in 2 devicelib spvs(fallback version and native version). The native version targets for any platform which supports "cl_intel_bfloat16_conversions" extension and fallback version is used for all other platforms. SYCL runtime will select the bfloat16 spvs during execution time by checking bfloat16 extension. The design requires us to ship 2 spv files together with sycl runtime which some users may dislike. The PR uses sycl dynamic library mechanism to re-implement this behavior. These 2 bfloat16 lib files are regarded as dynamic library and embedded to final executable, so we don't need to ship any bfloat16 spv libs. The PR consists following changes: 1. Driver: pass the devicelib file location to sycl-post-link tool. 2. sycl-post-link: analyze user's device image to see whether bfloat16 devicelib functions are used. If yes, add the 2 bfloat16 devicelib files as "required" dynamic library. All required bfloat16 devicelib functions are treated as "imported" symbols in user's device image and all functions in bfloat16 devicelib are "exported" symbols. 3. Sycl runtime will load and link the required bfloat16 devicelib image and resolve the imported symbols. Fallback and native version of bfloat16 devicelib files have exactly same exported functions, we add a new metadata("SYCL_DEVICELIB_BF16_TYPE") to indicate the version in them. SYCL runtime will check cl_intel_bfloat16_conversions extension and this metadata to decide which version will be linked. --------- Signed-off-by: jinge90 <ge.jin@intel.com> Co-authored-by: Steffen Larsen <steffen.larsen@intel.com> Co-authored-by: Alexey Sachkov <alexey.sachkov@intel.com>
1 parent c342667 commit e2eaf58

File tree

21 files changed

+426
-43
lines changed

21 files changed

+426
-43
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11022,6 +11022,7 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC,
1102211022
const JobAction &JA,
1102311023
const llvm::opt::ArgList &TCArgs,
1102411024
ArgStringList &PostLinkArgs) {
11025+
1102511026
// See if device code splitting is requested
1102611027
if (Arg *A = TCArgs.getLastArg(options::OPT_fsycl_device_code_split_EQ)) {
1102711028
auto CodeSplitValue = StringRef(A->getValue());
@@ -11047,6 +11048,25 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC,
1104711048

1104811049
if (allowDeviceImageDependencies(TCArgs))
1104911050
addArgs(PostLinkArgs, TCArgs, {"-allow-device-image-dependencies"});
11051+
11052+
// For bfloat16 conversions LLVM IR devicelib, we only need to embed it
11053+
// when non-AOT compilation is used.
11054+
if (TC.getTriple().isSPIROrSPIRV() && !TC.getTriple().isSPIRAOT()) {
11055+
SYCLInstallationDetector SYCLInstall(TC.getDriver());
11056+
SmallVector<SmallString<128>, 4> DeviceLibLocCandidates;
11057+
SmallString<128> NativeBfloat16Name("libsycl-native-bfloat16.bc");
11058+
SYCLInstall.getSYCLDeviceLibPath(DeviceLibLocCandidates);
11059+
for (const auto &DeviceLibLoc : DeviceLibLocCandidates) {
11060+
SmallString<128> FullLibName(DeviceLibLoc);
11061+
llvm::sys::path::append(FullLibName, NativeBfloat16Name);
11062+
if (llvm::sys::fs::exists(FullLibName)) {
11063+
SmallString<128> SYCLDeviceLibDir("--device-lib-dir=");
11064+
SYCLDeviceLibDir += DeviceLibLoc.str();
11065+
addArgs(PostLinkArgs, TCArgs, {SYCLDeviceLibDir.str()});
11066+
break;
11067+
}
11068+
}
11069+
}
1105011070
}
1105111071

1105211072
// On Intel targets we don't need non-kernel functions as entry points,
Binary file not shown.

clang/test/Driver/linker-wrapper-sycl-win.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,17 @@
116116
// Error handling when --linker-path is not provided for clang-linker-wrapper
117117
// RUN: not clang-linker-wrapper 2>&1 | FileCheck --check-prefix=LINKER-PATH-NOT-PROVIDED %s
118118
// LINKER-PATH-NOT-PROVIDED: linker path missing, must pass 'linker-path'
119+
120+
/// Check --device-lib-dir for sycl-post-link tool
121+
// ------
122+
// Generate .o file as linker wrapper input.
123+
124+
// RUN: %clang %s -fsycl -fsycl-targets=spir64-unknown-unknown -c --offload-new-driver -o %t5.o
125+
//
126+
// Run clang-linker-wrapper test
127+
//
128+
// RUN: clang-linker-wrapper -sycl-post-link-options="SYCL_POST_LINK_OPTIONS" -llvm-spirv-options="LLVM_SPIRV_OPTIONS" "--host-triple=x86_64-pc-windows-msvc" "--linker-path=/usr/bin/ld" "--" HOST_LINKER_FLAGS "-dynamic-linker" HOST_DYN_LIB "-o" "a.out" HOST_LIB_PATH HOST_STAT_LIB %t5.o -sycl-device-libraries=libsycl-crt.new.o -sycl-device-library-location=%S/Inputs/SYCL/lib --dry-run 2>&1 | FileCheck -check-prefix=CHK-CMDS-DEVICE-LIB-DIR %s
129+
// CHK-CMDS-DEVICE-LIB-DIR: "{{.*}}spirv-to-ir-wrapper.exe" {{.*}} --llvm-spirv-opts --spirv-preserve-auxdata --spirv-target-env=SPV-IR --spirv-builtin-format=global
130+
// CHK-CMDS-DEVICE-LIB-DIR-NEXT: "{{.*}}llvm-link.exe" {{.*}} --suppress-warnings
131+
// CHK-CMDS-DEVICE-LIB-DIR-NEXT: "{{.*}}llvm-link.exe" -only-needed {{.*}} --suppress-warnings
132+
// CHK-CMDS-DEVICE-LIB-DIR-NEXT: "{{.*}}sycl-post-link.exe"{{.*}} --device-lib-dir={{.*}}/Inputs/SYCL/lib {{.*}} SYCL_POST_LINK_OPTIONS {{.*}}

clang/test/Driver/linker-wrapper-sycl.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,3 +194,15 @@
194194
// Error handling when --linker-path is not provided for clang-linker-wrapper
195195
// RUN: not clang-linker-wrapper 2>&1 | FileCheck --check-prefix=LINKER-PATH-NOT-PROVIDED %s
196196
// LINKER-PATH-NOT-PROVIDED: linker path missing, must pass 'linker-path'
197+
198+
/// check for --device-lib-dir options for sycl-post-link.
199+
// -------
200+
// Generate .o file as linker wrapper input.
201+
//
202+
// RUN: %clang %s -fsycl -fsycl-targets=spir64-unknown-unknown -c --offload-new-driver -o %t5.o
203+
//
204+
// RUN: clang-linker-wrapper -sycl-post-link-options="SYCL_POST_LINK_OPTIONS" -llvm-spirv-options="LLVM_SPIRV_OPTIONS" "--host-triple=x86_64-unknown-linux-gnu" "--linker-path=/usr/bin/ld" "--" HOST_LINKER_FLAGS "-dynamic-linker" HOST_DYN_LIB "-o" "a.out" HOST_LIB_PATH HOST_STAT_LIB %t5.o -sycl-device-libraries=libsycl-crt.new.o -sycl-device-library-location=%S/Inputs/SYCL/lib --dry-run 2>&1 | FileCheck -check-prefix=CHK-CMDS-DEVICE-LIB-DIR %s
205+
// CHK-CMDS-DEVICE-LIB-DIR: "{{.*}}spirv-to-ir-wrapper" {{.*}} --llvm-spirv-opts --spirv-preserve-auxdata --spirv-target-env=SPV-IR --spirv-builtin-format=global
206+
// CHK-CMDS-DEVICE-LIB-DIR-NEXT: "{{.*}}llvm-link" {{.*}} --suppress-warnings
207+
// CHK-CMDS-DEVICE-LIB-DIR-NEXT: "{{.*}}llvm-link" -only-needed {{.*}} --suppress-warnings
208+
// CHK-CMDS-DEVICE-LIB-DIR-NEXT: "{{.*}}sycl-post-link"{{.*}} --device-lib-dir={{.*}}/Inputs/SYCL/lib {{.*}} SYCL_POST_LINK_OPTIONS {{.*}}

clang/test/Driver/sycl-post-link-options-win.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// REQUIRES: system-windows
22
/// Verify same set of sycl-post-link options generated for old and new offloading model
33
// RUN: %clangxx -### --target=x86_64-pc-windows-msvc -fsycl \
4-
// RUN: -Xdevice-post-link -O0 %s 2>&1 \
4+
// RUN: -Xdevice-post-link -O0 %s --sysroot=%S/Inputs/SYCL 2>&1 \
55
// RUN: | FileCheck -check-prefix OPTIONS_POSTLINK_JIT_OLD %s
6-
// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0"
6+
// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "--device-lib-dir={{.*}}" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0"
77
// -------
88
// Generate .o file as linker wrapper input.
99
//

clang/test/Driver/sycl-post-link-options.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// REQUIRES: system-linux
22
/// Verify same set of sycl-post-link options generated for old and new offloading model
33
// RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl -### \
4-
// RUN: --no-offload-new-driver -Xdevice-post-link -O0 %s 2>&1 \
4+
// RUN: --no-offload-new-driver -Xdevice-post-link -O0 %s --sysroot=%S/Inputs/SYCL 2>&1 \
55
// RUN: | FileCheck -check-prefix OPTIONS_POSTLINK_JIT_OLD %s
6-
// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0"
6+
// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "--device-lib-dir={{.*}}" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0"
77
//
88
// Generate .o file as linker wrapper input.
99
//

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,16 @@ runSYCLPostLinkTool(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
726726
SmallVector<StringRef, 8> CmdArgs;
727727
CmdArgs.push_back(*SYCLPostLinkPath);
728728
const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
729+
Arg *SYCLDeviceLibLoc = Args.getLastArg(OPT_sycl_device_library_location_EQ);
730+
if (SYCLDeviceLibLoc && !Triple.isSPIRAOT()) {
731+
std::string SYCLDeviceLibLocParam = SYCLDeviceLibLoc->getValue();
732+
std::string BF16DeviceLibLoc =
733+
SYCLDeviceLibLocParam + "/libsycl-native-bfloat16.bc";
734+
if (llvm::sys::fs::exists(BF16DeviceLibLoc)) {
735+
SYCLDeviceLibLocParam = "--device-lib-dir=" + SYCLDeviceLibLocParam;
736+
CmdArgs.push_back(Args.MakeArgString(StringRef(SYCLDeviceLibLocParam)));
737+
}
738+
}
729739
getTripleBasedSYCLPostLinkOpts(Args, CmdArgs, Triple);
730740
StringRef SYCLPostLinkOptions;
731741
if (Arg *A = Args.getLastArg(OPT_sycl_post_link_options_EQ))

llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "llvm/ADT/SetVector.h"
1414
#include "llvm/SYCLLowerIR/ModuleSplitter.h"
15+
#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h"
1516
#include "llvm/Support/PropertySetIO.h"
1617
#include <string>
1718
namespace llvm {
@@ -34,6 +35,9 @@ bool isModuleUsingTsan(const Module &M);
3435
using PropSetRegTy = llvm::util::PropertySetRegistry;
3536
using EntryPointSet = SetVector<Function *>;
3637

38+
PropSetRegTy computeDeviceLibProperties(const Module &M,
39+
const std::string &SYCLDeviceLibName);
40+
3741
PropSetRegTy computeModuleProperties(const Module &M,
3842
const EntryPointSet &EntryPoints,
3943
const GlobalBinImageProps &GlobProps);

llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/Support/Error.h"
2222
#include "llvm/Support/PropertySetIO.h"
2323

24+
#include <array>
2425
#include <memory>
2526
#include <optional>
2627
#include <string>
@@ -38,6 +39,8 @@ class OptionCategory;
3839
namespace module_split {
3940

4041
constexpr char SYCL_ESIMD_SPLIT_MD_NAME[] = "sycl-esimd-split-status";
42+
constexpr std::array<const char *, 2> SYCLDeviceLibs = {
43+
"libsycl-fallback-bfloat16.bc", "libsycl-native-bfloat16.bc"};
4144

4245
extern cl::OptionCategory &getModuleSplitCategory();
4346

@@ -129,6 +132,7 @@ class ModuleDesc {
129132
std::unique_ptr<Module> M;
130133
EntryPointGroup EntryPoints;
131134
bool IsTopLevel = false;
135+
bool IsSYCLDeviceLib = false;
132136
mutable std::optional<SYCLDeviceRequirements> Reqs;
133137

134138
public:
@@ -140,7 +144,16 @@ class ModuleDesc {
140144
Properties Props;
141145

142146
ModuleDesc(std::unique_ptr<Module> &&M, StringRef Name = "TOP-LEVEL")
143-
: M(std::move(M)), IsTopLevel(true), Name(Name) {}
147+
: M(std::move(M)), IsTopLevel(true), Name(Name) {
148+
// DeviceLib module doesn't include any entry point,it can be constructed
149+
// using ctor without any entry point related parameter.
150+
for (auto Fn : SYCLDeviceLibs) {
151+
if (StringRef(Fn) == Name) {
152+
IsSYCLDeviceLib = true;
153+
break;
154+
}
155+
}
156+
}
144157

145158
ModuleDesc(std::unique_ptr<Module> &&M, EntryPointGroup &&EntryPoints,
146159
const Properties &Props)
@@ -166,6 +179,7 @@ class ModuleDesc {
166179

167180
bool isESIMD() const { return EntryPoints.isEsimd(); }
168181
bool isSYCL() const { return EntryPoints.isSycl(); }
182+
bool isSYCLDeviceLib() const { return IsSYCLDeviceLib; }
169183

170184
const EntryPointSet &entries() const { return EntryPoints.Functions; }
171185
const EntryPointGroup &getEntryPointGroup() const { return EntryPoints; }

llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,14 @@
2020

2121
namespace llvm {
2222

23+
class Function;
2324
class Module;
2425

2526
// DeviceLibExt is shared between sycl-post-link tool and sycl runtime.
2627
// If any change is made here, need to sync with DeviceLibExt definition
2728
// in sycl/source/detail/program_manager/program_manager.hpp
29+
// TODO: clear all these DeviceLibExt defs when begin to remove sycl
30+
// devicelib online link path.
2831
enum class DeviceLibExt : std::uint32_t {
2932
cl_intel_devicelib_assert,
3033
cl_intel_devicelib_math,
@@ -39,5 +42,6 @@ enum class DeviceLibExt : std::uint32_t {
3942
};
4043

4144
uint32_t getSYCLDeviceLibReqMask(const Module &M);
42-
45+
bool isSYCLDeviceLibBF16Used(const Module &M);
46+
bool isBF16DeviceLibFuncDecl(const Function &F);
4347
} // namespace llvm

0 commit comments

Comments
 (0)