Skip to content

Commit a8c2770

Browse files
committed
[NVPTX] Fix poorly designed assertion introduced in D120129
NVPTXTargetLowering::getFunctionParamOptimizedAlign, which was introduces in D120129, contained a poorly designed assertion checking that a function with internal or private linkage is not a kernel. It relied on invariants that were not actually guaranteed, and that resulted in compiler crash with some CUDA versions (see discussion with @jdoerfert in D120129). This patch changes that assertion and makes it use isKernelFunction which is designed exactly for such checks. This patch also includes a test with IR that caused compiler crash before. Differential Revision: https://reviews.llvm.org/D122562
1 parent 774818c commit a8c2770

File tree

2 files changed

+38
-33
lines changed

2 files changed

+38
-33
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4260,40 +4260,11 @@ Align NVPTXTargetLowering::getFunctionParamOptimizedAlign(
42604260

42614261
// If a function has linkage different from internal or private, we
42624262
// must use default ABI alignment as external users rely on it.
4263-
switch (F->getLinkage()) {
4264-
case GlobalValue::InternalLinkage:
4265-
case GlobalValue::PrivateLinkage: {
4266-
// Check that if a function has internal or private linkage
4267-
// it is not a kernel.
4268-
#ifndef NDEBUG
4269-
const NamedMDNode *NMDN =
4270-
F->getParent()->getNamedMetadata("nvvm.annotations");
4271-
if (NMDN) {
4272-
for (const MDNode *MDN : NMDN->operands()) {
4273-
assert(MDN->getNumOperands() == 3);
4274-
4275-
const Metadata *MD0 = MDN->getOperand(0).get();
4276-
const auto *MDV0 = cast<ConstantAsMetadata>(MD0)->getValue();
4277-
const auto *MDFn = cast<Function>(MDV0);
4278-
if (MDFn != F)
4279-
continue;
4280-
4281-
const Metadata *MD1 = MDN->getOperand(1).get();
4282-
const MDString *MDStr = cast<MDString>(MD1);
4283-
if (MDStr->getString() != "kernel")
4284-
continue;
4285-
4286-
const Metadata *MD2 = MDN->getOperand(2).get();
4287-
const auto *MDV2 = cast<ConstantAsMetadata>(MD2)->getValue();
4288-
assert(!cast<ConstantInt>(MDV2)->isZero());
4289-
}
4290-
}
4291-
#endif
4292-
return Align(std::max(uint64_t(16), ABITypeAlign));
4293-
}
4294-
default:
4263+
if (!F->hasLocalLinkage())
42954264
return Align(ABITypeAlign);
4296-
}
4265+
4266+
assert(!isKernelFunction(*F) && "Expect kernels to have non-local linkage");
4267+
return Align(std::max(uint64_t(16), ABITypeAlign));
42974268
}
42984269

42994270
/// isLegalAddressingMode - Return true if the addressing mode represented
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; RUN: llc < %s -mtriple=nvptx-unknown-unknown | FileCheck %s
2+
;
3+
; NVPTXTargetLowering::getFunctionParamOptimizedAlign, which was introduces in
4+
; D120129, contained a poorly designed assertion checking that a function with
5+
; internal or private linkage is not a kernel. It relied on invariants that
6+
; were not actually guaranteed, and that resulted in compiler crash with some
7+
; CUDA versions (see discussion with @jdoerfert in D120129). This test contains
8+
; metadata that caused compiler crash and a function with internal linkage
9+
; which purpose is to let compiler run on path where the crash happened.
10+
; Metadata was obtained from libdevice.10.bc shipped with cuda-11-0.
11+
12+
13+
define internal i32 @foo() {
14+
; CHECK-LABEL: .func (.param .b32 func_retval0) foo()
15+
ret i32 42
16+
}
17+
18+
define i32 @bar() {
19+
; CHECK-LABEL: .visible .func (.param .b32 func_retval0) bar()
20+
%x = call i32 @foo()
21+
ret i32 %x
22+
}
23+
24+
!nvvmir.version = !{!0}
25+
!nvvm.annotations = !{!1, !2, !1, !3, !3, !3, !3, !4, !4, !3}
26+
27+
!0 = !{i32 1, i32 4}
28+
!1 = !{null, !"align", i32 8}
29+
!2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080}
30+
!3 = !{null, !"align", i32 16}
31+
!4 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088}
32+
!5 = distinct !{!5, !6}
33+
!6 = !{!"llvm.loop.unroll.count", i32 1}
34+
!7 = distinct !{!7, !6}

0 commit comments

Comments
 (0)