Skip to content

[Clang] Extract offloading code from static libs with 'offload-arch=' #147823

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9179,7 +9179,9 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
// specific architecture via -Xarch_<cpu> will not be forwarded.
ArgStringList CompilerArgs;
ArgStringList LinkerArgs;
for (Arg *A : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) {
const DerivedArgList &ToolChainArgs =
C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind);
for (Arg *A : ToolChainArgs) {
if (A->getOption().matches(OPT_Zlinker_input))
LinkerArgs.emplace_back(A->getValue());
else if (ShouldForward(CompilerOptions, A))
Expand All @@ -9188,6 +9190,11 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
A->render(Args, LinkerArgs);
}

// If the user explicitly requested it via `--offload-arch` we should
// extract it from any static libraries if present.
for (StringRef Arg : ToolChainArgs.getAllArgValues(OPT_offload_arch_EQ))
CmdArgs.emplace_back(Args.MakeArgString("--should-extract=" + Arg));

// If this is OpenMP the device linker will need `-lompdevice`.
if (Kind == Action::OFK_OpenMP && !Args.hasArg(OPT_no_offloadlib) &&
(TC->getTriple().isAMDGPU() || TC->getTriple().isNVPTX()))
Expand Down
9 changes: 9 additions & 0 deletions clang/test/Driver/openmp-offload-gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -395,3 +395,12 @@
// RUN: --offload-arch=sm_52 -foffload-lto=thin -nogpulib -nogpuinc %s 2>&1 \
// RUN: | FileCheck --check-prefix=THINLTO-SM52 %s
// THINLTO-SM52: --device-compiler=nvptx64-nvidia-cuda=-flto=thin

//
// Check the requested architecture is passed if provided.
//
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
// RUN: --offload-arch=gfx906 -foffload-lto=thin -nogpulib -nogpuinc %s 2>&1 \
// RUN: | FileCheck --check-prefix=SHOULD-EXTRACT %s
//
// SHOULD-EXTRACT: clang-linker-wrapper{{.*}}"--should-extract=gfx906"
12 changes: 9 additions & 3 deletions clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1302,6 +1302,11 @@ getDeviceInput(const ArgList &Args) {
// after every regular input file so that libraries may be included out of
// order. This follows 'ld.lld' semantics which are more lenient.
bool Extracted = true;
llvm::DenseSet<StringRef> ShouldExtract;
for (auto &Arg : Args.getAllArgValues(OPT_should_extract)) {
llvm::errs() << Arg << "\n";
ShouldExtract.insert(Arg);
}
while (Extracted) {
Extracted = false;
for (OffloadFile &Binary : ArchiveFilesToExtract) {
Expand All @@ -1315,8 +1320,9 @@ getDeviceInput(const ArgList &Args) {
CompatibleTargets.emplace_back(ID);

for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) {
// Only extract an if we have an an object matching this target.
if (!InputFiles.count(ID))
// Only extract an if we have an an object matching this target or it
// was specifically requested.
if (!InputFiles.count(ID) && !ShouldExtract.contains(ID.second))
continue;

Expected<bool> ExtractOrErr =
Expand All @@ -1330,7 +1336,7 @@ getDeviceInput(const ArgList &Args) {

// Skip including the file if it is an archive that does not resolve
// any symbols.
if (!Extracted)
if (!Extracted && !ShouldExtract.contains(ID.second))
continue;

// If another target needs this binary it must be copied instead.
Expand Down
4 changes: 4 additions & 0 deletions clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ def override_image : Joined<["--"], "override-image=">,
Flags<[WrapperOnlyOption]>, MetaVarName<"<kind=file>">,
HelpText<"Uses the provided file as if it were the output of the device link step">;

def should_extract : CommaJoined<["--"], "should-extract=">,
Flags<[WrapperOnlyOption]>, MetaVarName<"<kind=file>">,
HelpText<"Set of device architectures we should always extract if found.">;

// Flags passed to the device linker.
def arch_EQ : Joined<["--"], "arch=">,
Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"<arch>">,
Expand Down
Loading