Skip to content

Commit e5de913

Browse files
authored
[SYCL][CUDA][HIP] Implement support for AMD and NVIDIA architectures as argument to fsycl-targets (#7348)
* Update if_architecture_is extension to include NVIDIA and AMD architectures. * Moves if_architecture_is code header file from intel to oneapi. *Updates experimental/sycl_ext_intel_device_architecture.asciidoc with the contents from proposed/sycl_ext_oneapi_device_architecture.asciidoc. * Rename sycl_ext_intel_device_architecture.asciidoc to sycl_ext_oneapi_device_architecture.asciidoc. * Delete proposed/ycl_ext_oneapi_device_architecture.asciidoc. * Renames nvidia_gpu_smxx to nvidia_gpu_sm_xx. * Remove DPC++ un-supported architectures for nvidia (sm_20 to sm_37)
1 parent ed3d35c commit e5de913

File tree

13 files changed

+1182
-1049
lines changed

13 files changed

+1182
-1049
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -830,19 +830,6 @@ static bool addSYCLDefaultTriple(Compilation &C,
830830
return true;
831831
}
832832

833-
// Prefix for Intel GPU specific targets used for -fsycl-targets
834-
constexpr char IntelGPU[] = "intel_gpu_";
835-
836-
static llvm::Optional<StringRef> isIntelGPUTarget(StringRef Target) {
837-
// Handle target specifications that resemble 'intel_gpu_*' here. These are
838-
// 'spir64_gen' based.
839-
if (Target.startswith(IntelGPU)) {
840-
return tools::SYCL::gen::resolveGenDevice(
841-
Target.drop_front(sizeof(IntelGPU) - 1));
842-
}
843-
return llvm::None;
844-
}
845-
846833
void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
847834
InputList &Inputs) {
848835

@@ -851,6 +838,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
851838
//
852839
// We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA
853840
// or HIP type. However, mixed CUDA/HIP compilation is not supported.
841+
using namespace tools::SYCL;
854842
bool IsCuda =
855843
llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
856844
return types::isCuda(I.first);
@@ -1128,12 +1116,24 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11281116

11291117
for (StringRef Val : SYCLTargetsValues->getValues()) {
11301118
StringRef UserTargetName(Val);
1131-
if (auto Device = isIntelGPUTarget(Val)) {
1119+
if (auto Device = gen::isGPUTarget<gen::IntelGPU>(Val)) {
11321120
if (Device->empty()) {
11331121
Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
11341122
continue;
11351123
}
11361124
UserTargetName = "spir64_gen";
1125+
} else if (auto Device = gen::isGPUTarget<gen::NvidiaGPU>(Val)) {
1126+
if (Device->empty()) {
1127+
Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
1128+
continue;
1129+
}
1130+
UserTargetName = "nvptx64-nvidia-cuda";
1131+
} else if (auto Device = gen::isGPUTarget<gen::AmdGPU>(Val)) {
1132+
if (Device->empty()) {
1133+
Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
1134+
continue;
1135+
}
1136+
UserTargetName = "amdgcn-amd-amdhsa";
11371137
}
11381138

11391139
if (!isValidSYCLTriple(MakeSYCLDeviceTriple(UserTargetName))) {
@@ -5802,6 +5802,7 @@ class OffloadingActionBuilder final {
58025802
}
58035803

58045804
bool initialize() override {
5805+
using namespace tools::SYCL;
58055806
// Get the SYCL toolchains. If we don't get any, the action builder will
58065807
// know there is nothing to do related to SYCL offloading.
58075808
auto SYCLTCRange = C.getOffloadToolChains<Action::OFK_SYCL>();
@@ -5841,15 +5842,35 @@ class OffloadingActionBuilder final {
58415842
llvm::StringMap<StringRef> FoundNormalizedTriples;
58425843
for (StringRef Val : SYCLTargetsValues->getValues()) {
58435844
StringRef UserTargetName(Val);
5844-
if (auto ValidDevice = isIntelGPUTarget(Val)) {
5845+
if (auto ValidDevice = gen::isGPUTarget<gen::IntelGPU>(Val)) {
58455846
if (ValidDevice->empty())
58465847
// Unrecognized, we have already diagnosed this earlier; skip.
58475848
continue;
58485849
// Add the proper -device value to the list.
58495850
GpuArchList.emplace_back(C.getDriver().MakeSYCLDeviceTriple(
58505851
"spir64_gen"), ValidDevice->data());
58515852
UserTargetName = "spir64_gen";
5853+
} else if (auto ValidDevice =
5854+
gen::isGPUTarget<gen::NvidiaGPU>(Val)) {
5855+
if (ValidDevice->empty())
5856+
// Unrecognized, we have already diagnosed this earlier; skip.
5857+
continue;
5858+
// Add the proper -device value to the list.
5859+
GpuArchList.emplace_back(
5860+
C.getDriver().MakeSYCLDeviceTriple("nvptx64-nvidia-cuda"),
5861+
ValidDevice->data());
5862+
UserTargetName = "nvptx64-nvidia-cuda";
5863+
} else if (auto ValidDevice = gen::isGPUTarget<gen::AmdGPU>(Val)) {
5864+
if (ValidDevice->empty())
5865+
// Unrecognized, we have already diagnosed this earlier; skip.
5866+
continue;
5867+
// Add the proper -device value to the list.
5868+
GpuArchList.emplace_back(
5869+
C.getDriver().MakeSYCLDeviceTriple("amdgcn-amd-amdhsa"),
5870+
ValidDevice->data());
5871+
UserTargetName = "amdgcn-amd-amdhsa";
58525872
}
5873+
58535874
llvm::Triple TT(C.getDriver().MakeSYCLDeviceTriple(Val));
58545875
std::string NormalizedName = TT.normalize();
58555876

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5090,10 +5090,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
50905090
// between device and host where we should be able to use the offloading
50915091
// arch to add the macro to the host compile.
50925092
auto addTargetMacros = [&](const llvm::Triple &Triple) {
5093-
if (!Triple.isSPIR())
5093+
if (!Triple.isSPIR() && !Triple.isNVPTX() && !Triple.isAMDGCN())
50945094
return;
50955095
SmallString<64> Macro;
5096-
if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) {
5096+
if ((Triple.isSPIR() &&
5097+
Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) ||
5098+
Triple.isNVPTX() || Triple.isAMDGCN()) {
50975099
StringRef Device = JA.getOffloadingArch();
50985100
if (!Device.empty()) {
50995101
Macro = "-D";

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 136 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -597,60 +597,130 @@ void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C,
597597
StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) {
598598
StringRef Device;
599599
Device = llvm::StringSwitch<StringRef>(DeviceName)
600-
.Cases("bdw", "8_0_0", "bdw")
601-
.Cases("skl", "9_0_9", "skl")
602-
.Cases("kbl", "9_1_9", "kbl")
603-
.Cases("cfl", "9_2_9", "cfl")
604-
.Cases("apl", "9_3_0", "apl")
605-
.Cases("glk", "9_4_0", "glk")
606-
.Cases("whl", "9_5_0", "whl")
607-
.Cases("aml", "9_6_0", "aml")
608-
.Cases("cml", "9_7_0", "cml")
609-
.Cases("icllp", "11_0_0", "icllp")
610-
.Cases("ehl", "11_2_0", "ehl")
611-
.Cases("tgllp", "12_0_0", "tgllp")
612-
.Case("rkl", "rkl")
613-
.Case("adl_s", "adl_s")
614-
.Case("rpl_s", "rpl_s")
615-
.Case("adl_p", "adl_p")
616-
.Case("adl_n", "adl_n")
617-
.Cases("dg1", "12_10_0", "dg1")
618-
.Case("acm_g10", "acm_g10")
619-
.Case("acm_g11", "acm_g11")
620-
.Case("acm_g12", "acm_g12")
621-
.Case("pvc", "pvc")
600+
.Cases("intel_gpu_bdw", "intel_gpu_8_0_0", "bdw")
601+
.Cases("intel_gpu_skl", "intel_gpu_9_0_9", "skl")
602+
.Cases("intel_gpu_kbl", "intel_gpu_9_1_9", "kbl")
603+
.Cases("intel_gpu_cfl", "intel_gpu_9_2_9", "cfl")
604+
.Cases("intel_gpu_apl", "intel_gpu_9_3_0", "apl")
605+
.Cases("intel_gpu_glk", "intel_gpu_9_4_0", "glk")
606+
.Cases("intel_gpu_whl", "intel_gpu_9_5_0", "whl")
607+
.Cases("intel_gpu_aml", "intel_gpu_9_6_0", "aml")
608+
.Cases("intel_gpu_cml", "intel_gpu_9_7_0", "cml")
609+
.Cases("intel_gpu_icllp", "intel_gpu_11_0_0", "icllp")
610+
.Cases("intel_gpu_ehl", "intel_gpu_11_2_0", "ehl")
611+
.Cases("intel_gpu_tgllp", "intel_gpu_12_0_0", "tgllp")
612+
.Case("intel_gpu_rkl", "rkl")
613+
.Case("intel_gpu_adl_s", "adl_s")
614+
.Case("intel_gpu_rpl_s", "rpl_s")
615+
.Case("intel_gpu_adl_p", "adl_p")
616+
.Case("intel_gpu_adl_n", "adl_n")
617+
.Cases("intel_gpu_dg1", "intel_gpu_12_10_0", "dg1")
618+
.Case("intel_gpu_acm_g10", "acm_g10")
619+
.Case("intel_gpu_acm_g11", "acm_g11")
620+
.Case("intel_gpu_acm_g12", "acm_g12")
621+
.Case("intel_gpu_pvc", "pvc")
622+
.Case("nvidia_gpu_sm_50", "sm_50")
623+
.Case("nvidia_gpu_sm_52", "sm_52")
624+
.Case("nvidia_gpu_sm_53", "sm_53")
625+
.Case("nvidia_gpu_sm_60", "sm_60")
626+
.Case("nvidia_gpu_sm_61", "sm_61")
627+
.Case("nvidia_gpu_sm_62", "sm_62")
628+
.Case("nvidia_gpu_sm_70", "sm_70")
629+
.Case("nvidia_gpu_sm_72", "sm_72")
630+
.Case("nvidia_gpu_sm_75", "sm_75")
631+
.Case("nvidia_gpu_sm_80", "sm_80")
632+
.Case("nvidia_gpu_sm_86", "sm_86")
633+
.Case("nvidia_gpu_sm_87", "sm_87")
634+
.Case("nvidia_gpu_sm_89", "sm_89")
635+
.Case("nvidia_gpu_sm_90", "sm_90")
636+
.Case("amd_gpu_gfx700", "gfx700")
637+
.Case("amd_gpu_gfx701", "gfx701")
638+
.Case("amd_gpu_gfx702", "gfx702")
639+
.Case("amd_gpu_gfx801", "gfx801")
640+
.Case("amd_gpu_gfx802", "gfx802")
641+
.Case("amd_gpu_gfx803", "gfx803")
642+
.Case("amd_gpu_gfx805", "gfx805")
643+
.Case("amd_gpu_gfx810", "gfx810")
644+
.Case("amd_gpu_gfx900", "gfx900")
645+
.Case("amd_gpu_gfx902", "gfx902")
646+
.Case("amd_gpu_gfx904", "gfx904")
647+
.Case("amd_gpu_gfx906", "gfx906")
648+
.Case("amd_gpu_gfx908", "gfx908")
649+
.Case("amd_gpu_gfx90a", "gfx90a")
650+
.Case("amd_gpu_gfx1010", "gfx1010")
651+
.Case("amd_gpu_gfx1011", "gfx1011")
652+
.Case("amd_gpu_gfx1012", "gfx1012")
653+
.Case("amd_gpu_gfx1013", "gfx1013")
654+
.Case("amd_gpu_gfx1030", "gfx1030")
655+
.Case("amd_gpu_gfx1031", "gfx1031")
656+
.Case("amd_gpu_gfx1032", "gfx1032")
622657
.Default("");
623658
return Device;
624659
}
625660

626-
StringRef SYCL::gen::getGenDeviceMacro(StringRef DeviceName) {
661+
SmallString<64> SYCL::gen::getGenDeviceMacro(StringRef DeviceName) {
627662
SmallString<64> Macro;
628663
StringRef Ext = llvm::StringSwitch<StringRef>(DeviceName)
629-
.Case("bdw", "BDW")
630-
.Case("skl", "SKL")
631-
.Case("kbl", "KBL")
632-
.Case("cfl", "CFL")
633-
.Case("apl", "APL")
634-
.Case("glk", "GLK")
635-
.Case("whl", "WHL")
636-
.Case("aml", "AML")
637-
.Case("cml", "CML")
638-
.Case("icllp", "ICLLP")
639-
.Case("ehl", "EHL")
640-
.Case("tgllp", "TGLLP")
641-
.Case("rkl", "RKL")
642-
.Case("adl_s", "ADL_S")
643-
.Case("rpl_s", "RPL_S")
644-
.Case("adl_p", "ADL_P")
645-
.Case("adl_n", "ADL_N")
646-
.Case("dg1", "DG1")
647-
.Case("acm_g10", "ACM_G10")
648-
.Case("acm_g11", "ACM_G11")
649-
.Case("acm_g12", "ACM_G12")
650-
.Case("pvc", "PVC")
664+
.Case("bdw", "INTEL_GPU_BDW")
665+
.Case("skl", "INTEL_GPU_SKL")
666+
.Case("kbl", "INTEL_GPU_KBL")
667+
.Case("cfl", "INTEL_GPU_CFL")
668+
.Case("apl", "INTEL_GPU_APL")
669+
.Case("glk", "INTEL_GPU_GLK")
670+
.Case("whl", "INTEL_GPU_WHL")
671+
.Case("aml", "INTEL_GPU_AML")
672+
.Case("cml", "INTEL_GPU_CML")
673+
.Case("icllp", "INTEL_GPU_ICLLP")
674+
.Case("ehl", "INTEL_GPU_EHL")
675+
.Case("tgllp", "INTEL_GPU_TGLLP")
676+
.Case("rkl", "INTEL_GPU_RKL")
677+
.Case("adl_s", "INTEL_GPU_ADL_S")
678+
.Case("rpl_s", "INTEL_GPU_RPL_S")
679+
.Case("adl_p", "INTEL_GPU_ADL_P")
680+
.Case("adl_n", "INTEL_GPU_ADL_N")
681+
.Case("dg1", "INTEL_GPU_DG1")
682+
.Case("acm_g10", "INTEL_GPU_ACM_G10")
683+
.Case("acm_g11", "INTEL_GPU_ACM_G11")
684+
.Case("acm_g12", "INTEL_GPU_ACM_G12")
685+
.Case("pvc", "INTEL_GPU_PVC")
686+
.Case("sm_50", "NVIDIA_GPU_SM_50")
687+
.Case("sm_52", "NVIDIA_GPU_SM_52")
688+
.Case("sm_53", "NVIDIA_GPU_SM_53")
689+
.Case("sm_60", "NVIDIA_GPU_SM_60")
690+
.Case("sm_61", "NVIDIA_GPU_SM_61")
691+
.Case("sm_62", "NVIDIA_GPU_SM_62")
692+
.Case("sm_70", "NVIDIA_GPU_SM_70")
693+
.Case("sm_72", "NVIDIA_GPU_SM_72")
694+
.Case("sm_75", "NVIDIA_GPU_SM_75")
695+
.Case("sm_80", "NVIDIA_GPU_SM_80")
696+
.Case("sm_86", "NVIDIA_GPU_SM_86")
697+
.Case("sm_87", "NVIDIA_GPU_SM_87")
698+
.Case("sm_89", "NVIDIA_GPU_SM_89")
699+
.Case("sm_90", "NVIDIA_GPU_SM_90")
700+
.Case("gfx700", "AMD_GPU_GFX700")
701+
.Case("gfx701", "AMD_GPU_GFX701")
702+
.Case("gfx702", "AMD_GPU_GFX702")
703+
.Case("gfx801", "AMD_GPU_GFX801")
704+
.Case("gfx802", "AMD_GPU_GFX802")
705+
.Case("gfx803", "AMD_GPU_GFX803")
706+
.Case("gfx805", "AMD_GPU_GFX805")
707+
.Case("gfx810", "AMD_GPU_GFX810")
708+
.Case("gfx900", "AMD_GPU_GFX900")
709+
.Case("gfx902", "AMD_GPU_GFX902")
710+
.Case("gfx904", "AMD_GPU_GFX904")
711+
.Case("gfx906", "AMD_GPU_GFX906")
712+
.Case("gfx908", "AMD_GPU_GFX908")
713+
.Case("gfx90a", "AMD_GPU_GFX90A")
714+
.Case("gfx1010", "AMD_GPU_GFX1010")
715+
.Case("gfx1011", "AMD_GPU_GFX1011")
716+
.Case("gfx1012", "AMD_GPU_GFX1012")
717+
.Case("gfx1013", "AMD_GPU_GFX1013")
718+
.Case("gfx1030", "AMD_GPU_GFX1030")
719+
.Case("gfx1031", "AMD_GPU_GFX1031")
720+
.Case("gfx1032", "AMD_GPU_GFX1032")
651721
.Default("");
652722
if (!Ext.empty()) {
653-
Macro = "__SYCL_TARGET_INTEL_GPU_";
723+
Macro = "__SYCL_TARGET_";
654724
Macro += Ext;
655725
Macro += "__";
656726
}
@@ -760,6 +830,25 @@ static void parseTargetOpts(StringRef ArgString, const llvm::opt::ArgList &Args,
760830
CmdArgs.push_back(Args.MakeArgString(TA));
761831
}
762832

833+
void SYCLToolChain::TranslateGPUTargetOpt(const llvm::opt::ArgList &Args,
834+
llvm::opt::ArgStringList &CmdArgs,
835+
OptSpecifier Opt_EQ) const {
836+
for (auto *A : Args) {
837+
if (A->getOption().matches(Opt_EQ)) {
838+
if (auto GpuDevice =
839+
tools::SYCL::gen::isGPUTarget<tools::SYCL::gen::AmdGPU>(
840+
A->getValue())) {
841+
StringRef ArgString;
842+
SmallString<64> OffloadArch("--offload-arch=");
843+
OffloadArch += GpuDevice->data();
844+
ArgString = OffloadArch;
845+
parseTargetOpts(ArgString, Args, CmdArgs);
846+
A->claim();
847+
}
848+
}
849+
}
850+
}
851+
763852
// Expects a specific type of option (e.g. -Xsycl-target-backend) and will
764853
// extract the arguments.
765854
void SYCLToolChain::TranslateTargetOpt(const llvm::opt::ArgList &Args,
@@ -915,6 +1004,7 @@ void SYCLToolChain::TranslateBackendTargetArgs(
9151004
// Handle -Xsycl-target-backend.
9161005
TranslateTargetOpt(Args, CmdArgs, options::OPT_Xsycl_backend,
9171006
options::OPT_Xsycl_backend_EQ, Device);
1007+
TranslateGPUTargetOpt(Args, CmdArgs, options::OPT_fsycl_targets_EQ);
9181008
}
9191009

9201010
void SYCLToolChain::TranslateLinkerTargetArgs(

clang/lib/Driver/ToolChains/SYCL.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,21 @@ class LLVM_LIBRARY_VISIBILITY BackendCompiler : public Tool {
106106
};
107107

108108
StringRef resolveGenDevice(StringRef DeviceName);
109-
StringRef getGenDeviceMacro(StringRef DeviceName);
109+
SmallString<64> getGenDeviceMacro(StringRef DeviceName);
110+
111+
// // Prefix for GPU specific targets used for -fsycl-targets
112+
constexpr char IntelGPU[] = "intel_gpu_";
113+
constexpr char NvidiaGPU[] = "nvidia_gpu_";
114+
constexpr char AmdGPU[] = "amd_gpu_";
115+
116+
template <auto GPUArh> llvm::Optional<StringRef> isGPUTarget(StringRef Target) {
117+
// Handle target specifications that resemble '(intel, nvidia, amd)_gpu_*'
118+
// here.
119+
if (Target.startswith(GPUArh)) {
120+
return resolveGenDevice(Target);
121+
}
122+
return llvm::None;
123+
}
110124

111125
} // end namespace gen
112126

@@ -189,6 +203,9 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain {
189203
llvm::opt::OptSpecifier Opt,
190204
llvm::opt::OptSpecifier Opt_EQ,
191205
StringRef Device) const;
206+
void TranslateGPUTargetOpt(const llvm::opt::ArgList &Args,
207+
llvm::opt::ArgStringList &CmdArgs,
208+
llvm::opt::OptSpecifier Opt_EQ) const;
192209
};
193210

194211
} // end namespace toolchains

0 commit comments

Comments
 (0)