Skip to content

Commit 255a774

Browse files
authored
[Driver][SYCL] Bound architecture mismatch with multiple targets (#15501)
When passing -fsycl-targets to specify targets to offload to, the user can pass multiple targets. When those targets are meant to set various associated architectures, those architectures should only be set for a singular given toolchain/target. Specifying a setting like -fsycl-targets=nvptx64,spir64_gen was setting the wrong device architecture for the spir64_gen compilation. The associated boundarch for nvptx64 (in this case sm_50) was being pushed to the spir64_gen target, causing the wrong device value to be used for the AOT compilation. Fix this issue by fixing the logic in which we were assigning the bound architecture to a given triple. The logic was not taking into account non-spir64_gen targets, assigning the wrong arch when spir64_gen was encountered after the nvptx64 target.
1 parent 17e8f16 commit 255a774

File tree

3 files changed

+77
-4
lines changed

3 files changed

+77
-4
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6350,7 +6350,7 @@ class OffloadingActionBuilder final {
63506350
if (GpuInitHasErrors)
63516351
return true;
63526352

6353-
int I = 0;
6353+
int GenIndex = 0;
63546354
// Fill SYCLTargetInfoList
63556355
for (auto &TT : SYCLTripleList) {
63566356
auto TCIt = llvm::find_if(
@@ -6363,10 +6363,21 @@ class OffloadingActionBuilder final {
63636363
// is the target device.
63646364
if (TT.isSPIR() &&
63656365
TT.getSubArch() == llvm::Triple::SPIRSubArch_gen) {
6366-
StringRef Device(GpuArchList[I].second);
6366+
// Multiple spir64_gen targets are allowed to be used via the
6367+
// -fsycl-targets=spir64_gen and -fsycl-targets=intel_gpu_*
6368+
// specifiers. Using an index through the known GpuArchList
6369+
// values, increment through them accordingly to allow for
6370+
// the multiple settings as well as preventing re-use.
6371+
while (TT != GpuArchList[GenIndex].first &&
6372+
GenIndex < GpuArchList.size())
6373+
++GenIndex;
6374+
if (GpuArchList[GenIndex].first != TT)
6375+
// No match.
6376+
continue;
6377+
StringRef Device(GpuArchList[GenIndex].second);
63676378
SYCLTargetInfoList.emplace_back(
63686379
*TCIt, Device.empty() ? nullptr : Device.data());
6369-
++I;
6380+
++GenIndex;
63706381
continue;
63716382
}
63726383
SYCLTargetInfoList.emplace_back(*TCIt, nullptr);
@@ -6380,7 +6391,6 @@ class OffloadingActionBuilder final {
63806391
}
63816392
assert(OffloadArch && "Failed to find matching arch.");
63826393
SYCLTargetInfoList.emplace_back(*TCIt, OffloadArch);
6383-
++I;
63846394
}
63856395
}
63866396
}

clang/test/Driver/sycl-offload-old-model.c

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,44 @@
622622
// CHK-PHASE-MULTI-TARG-BOUND-ARCH: 28: offload, "device-sycl (spir64-unknown-unknown)" {27}, object
623623
// CHK-PHASE-MULTI-TARG-BOUND-ARCH: 29: linker, {8, 21, 28}, image, (host-sycl)
624624

625+
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl \
626+
// RUN: -fno-sycl-instrument-device-code -fno-sycl-device-lib=all \
627+
// RUN: -fsycl-targets=nvptx64-nvidia-cuda,spir64_gen \
628+
// RUN: -Xsycl-target-backend=spir64_gen "-device skl" \
629+
// RUN: -ccc-print-phases %s 2>&1 \
630+
// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG-BOUND-ARCH2 %s
631+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl)
632+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
633+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 2: input, "[[INPUT]]", c++, (device-sycl)
634+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 3: preprocessor, {2}, c++-cpp-output, (device-sycl)
635+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 4: compiler, {3}, ir, (device-sycl)
636+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 5: offload, "host-sycl (x86_64-unknown-linux-gnu)" {1}, "device-sycl (spir64_gen-unknown-unknown)" {4}, c++-cpp-output
637+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 6: compiler, {5}, ir, (host-sycl)
638+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 7: backend, {6}, assembler, (host-sycl)
639+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 8: assembler, {7}, object, (host-sycl)
640+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 9: input, "[[INPUT]]", c++, (device-sycl, sm_50)
641+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 10: preprocessor, {9}, c++-cpp-output, (device-sycl, sm_50)
642+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 11: compiler, {10}, ir, (device-sycl, sm_50)
643+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 12: linker, {11}, ir, (device-sycl, sm_50)
644+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 13: sycl-post-link, {12}, ir, (device-sycl, sm_50)
645+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 14: file-table-tform, {13}, ir, (device-sycl, sm_50)
646+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 15: backend, {14}, assembler, (device-sycl, sm_50)
647+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 16: assembler, {15}, object, (device-sycl, sm_50)
648+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 17: linker, {15, 16}, cuda-fatbin, (device-sycl, sm_50)
649+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 18: foreach, {14, 17}, cuda-fatbin, (device-sycl, sm_50)
650+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 19: file-table-tform, {13, 18}, tempfiletable, (device-sycl, sm_50)
651+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 20: clang-offload-wrapper, {19}, object, (device-sycl, sm_50)
652+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 21: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {20}, object
653+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 22: linker, {4}, ir, (device-sycl)
654+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 23: sycl-post-link, {22}, tempfiletable, (device-sycl)
655+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 24: file-table-tform, {23}, tempfilelist, (device-sycl)
656+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 25: llvm-spirv, {24}, tempfilelist, (device-sycl)
657+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 26: backend-compiler, {25}, image, (device-sycl)
658+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 27: file-table-tform, {23, 26}, tempfiletable, (device-sycl)
659+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 28: clang-offload-wrapper, {27}, object, (device-sycl)
660+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 29: offload, "device-sycl (spir64_gen-unknown-unknown)" {28}, object
661+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 30: linker, {8, 21, 29}, image, (host-sycl)
662+
625663
/// Check the behaviour however with swapped -fsycl-targets
626664
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fno-sycl-instrument-device-code -fno-sycl-device-lib=all -fsycl-targets=spir64,nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \
627665
// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG-BOUND-ARCH-FLIPPED %s

clang/test/Driver/sycl-offload.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,31 @@
394394
// CHK-PHASE-MULTI-TARG-BOUND-ARCH: 16: assembler, {15}, object, (host-sycl)
395395
// CHK-PHASE-MULTI-TARG-BOUND-ARCH: 17: clang-linker-wrapper, {16}, image, (host-sycl)
396396

397+
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --offload-new-driver \
398+
// RUN: -fno-sycl-instrument-device-code -fno-sycl-device-lib=all \
399+
// RUN: -fsycl-targets=nvptx64-nvidia-cuda,spir64_gen \
400+
// RUN: -Xsycl-target-backend=spir64_gen "-device skl" \
401+
// RUN: -ccc-print-phases %s 2>&1 \
402+
// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG-BOUND-ARCH2 %s
403+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl)
404+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
405+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 2: compiler, {1}, ir, (host-sycl)
406+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 3: input, "[[INPUT]]", c++, (device-sycl, skl)
407+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, skl)
408+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 5: compiler, {4}, ir, (device-sycl, skl)
409+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 6: backend, {5}, ir, (device-sycl, skl)
410+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 7: offload, "device-sycl (spir64_gen-unknown-unknown:skl)" {6}, ir
411+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 8: input, "[[INPUT]]", c++, (device-sycl, sm_50)
412+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 9: preprocessor, {8}, c++-cpp-output, (device-sycl, sm_50)
413+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 10: compiler, {9}, ir, (device-sycl, sm_50)
414+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 11: backend, {10}, ir, (device-sycl, sm_50)
415+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 12: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {11}, ir
416+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 13: clang-offload-packager, {7, 12}, image, (device-sycl)
417+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 14: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (x86_64-unknown-linux-gnu)" {13}, ir
418+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 15: backend, {14}, assembler, (host-sycl)
419+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 16: assembler, {15}, object, (host-sycl)
420+
// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 17: clang-linker-wrapper, {16}, image, (host-sycl)
421+
397422
/// ###########################################################################
398423

399424
// Check if valid bound arch behaviour occurs when compiling for spir-v,nvidia-gpu, and amd-gpu

0 commit comments

Comments
 (0)