Skip to content

Commit e76ad72

Browse files
authored
[SYCL][CUDA] Compile .cu files with SYCL (#6395)
This PR allows to compile `.cu` files with SYCL, i.e., 1. Compile `.cu` sources in order to generate `.o` files for SYCL. 2. Compile `.cu` and `.cpp` files for obtaining a SYCL executable. 3. Create a dummy (CUDA) `__host__` function for each (CUDA) `__device__` function that hasn't one.
1 parent 531e18b commit e76ad72

File tree

6 files changed

+254
-52
lines changed

6 files changed

+254
-52
lines changed

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1626,6 +1626,18 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
16261626
if (Body && isa_and_nonnull<CoroutineBodyStmt>(Body))
16271627
llvm::append_range(FnArgs, FD->parameters());
16281628

1629+
// Generate a dummy __host__ function for compiling CUDA sources in SYCL.
1630+
if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice &&
1631+
getLangOpts().SYCLIsHost && !FD->hasAttr<CUDAHostAttr>() &&
1632+
FD->hasAttr<CUDADeviceAttr>()) {
1633+
Fn->setLinkage(llvm::Function::WeakODRLinkage);
1634+
if (FD->getReturnType()->isVoidType())
1635+
Builder.CreateRetVoid();
1636+
else
1637+
Builder.CreateRet(llvm::UndefValue::get(Fn->getReturnType()));
1638+
return;
1639+
}
1640+
16291641
// Generate the body of the function.
16301642
PGO.assignRegionCounters(GD, CurFn);
16311643
if (isa<CXXDestructorDecl>(FD))

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2740,6 +2740,16 @@ void CodeGenModule::EmitDeferred() {
27402740
CurDeclsToEmit.swap(DeferredDeclsToEmit);
27412741

27422742
for (GlobalDecl &D : CurDeclsToEmit) {
2743+
// Emit a dummy __host__ function if a legit one is not already present in
2744+
// case of SYCL compilation of CUDA sources.
2745+
if (LangOpts.CUDA && !LangOpts.CUDAIsDevice && LangOpts.SYCLIsHost) {
2746+
GlobalDecl OtherD;
2747+
if (lookupRepresentativeDecl(getMangledName(D), OtherD) &&
2748+
(D.getCanonicalDecl().getDecl() !=
2749+
OtherD.getCanonicalDecl().getDecl())) {
2750+
continue;
2751+
}
2752+
}
27432753
const ValueDecl *VD = cast<ValueDecl>(D.getDecl());
27442754
// If emitting for SYCL device, emit the deferred alias
27452755
// as well as what it aliases.
@@ -3374,11 +3384,17 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
33743384
// size and host-side address in order to provide access to
33753385
// their device-side incarnations.
33763386

3377-
// So device-only functions are the only things we skip.
3387+
// So device-only functions are the only things we skip, except for SYCL.
33783388
if (isa<FunctionDecl>(Global) && !Global->hasAttr<CUDAHostAttr>() &&
3379-
Global->hasAttr<CUDADeviceAttr>())
3389+
Global->hasAttr<CUDADeviceAttr>()) {
3390+
// In SYCL, every (CUDA) __device__ function needs to have a __host__
3391+
// counterpart that will be emitted in case of it is not already
3392+
// present.
3393+
if (LangOpts.SYCLIsHost && MustBeEmitted(Global) &&
3394+
MayBeEmittedEagerly(Global))
3395+
addDeferredDeclToEmit(GD);
33803396
return;
3381-
3397+
}
33823398
assert((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
33833399
"Expected Variable or Function");
33843400
}

0 commit comments

Comments
 (0)