Skip to content

Commit 9d570d5

Browse files
wenju-henikic
andauthored
[ValueTracking] Return true for AddrSpaceCast in canCreateUndefOrPoison (#144686)
In our downstream GPU target, following IR is valid before instcombine although the second addrspacecast causes UB. define i1 @test(ptr addrspace(1) noundef %v) { %0 = addrspacecast ptr addrspace(1) %v to ptr addrspace(4) %1 = call i32 @llvm.xxxx.isaddr.shared(ptr addrspace(4) %0) %2 = icmp eq i32 %1, 0 %3 = addrspacecast ptr addrspace(4) %0 to ptr addrspace(3) %4 = select i1 %2, ptr addrspace(3) null, ptr addrspace(3) %3 %5 = icmp eq ptr addrspace(3) %4, null ret i1 %5 } We have a custom optimization that replaces invalid addrspacecast with poison, and IR is still valid since `select` stops poison propagation. However, instcombine pass optimizes `select` to `or`: %0 = addrspacecast ptr addrspace(1) %v to ptr addrspace(4) %1 = call i32 @llvm.xxxx.isaddr.shared(ptr addrspace(4) %0) %2 = icmp eq i32 %1, 0 %3 = addrspacecast ptr addrspace(1) %v to ptr addrspace(3) %4 = icmp eq ptr addrspace(3) %3, null %5 = or i1 %2, %4 ret i1 %5 The transform is invalid for our target. --------- Co-authored-by: Nikita Popov <github@npopov.com>
1 parent a314ac4 commit 9d570d5

File tree

6 files changed

+33
-3
lines changed

6 files changed

+33
-3
lines changed

clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ void consumeBufferPtr(__amdgpu_buffer_rsrc_t *p) {
4444
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A]], align 16, !tbaa [[TBAA8:![0-9]+]]
4545
// CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
4646
// CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq ptr addrspace(5) [[A]], addrspacecast (ptr null to ptr addrspace(5))
47-
// CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TOBOOL_NOT_I]], [[TOBOOL_NOT]]
47+
// CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TOBOOL_NOT]], i1 true, i1 [[TOBOOL_NOT_I]]
4848
// CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END:%.*]], label [[IF_THEN_I:%.*]]
4949
// CHECK: if.then.i:
5050
// CHECK-NEXT: [[R:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[A]], i32 16

clang/test/CodeGenOpenCL/as_type.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ int3 f8(char16 x) {
6767
return __builtin_astype(x, int3);
6868
}
6969

70-
//CHECK: define{{.*}} spir_func noundef ptr addrspace(1) @addr_cast(ptr noundef readnone captures(ret: address, provenance) %[[x:.*]])
70+
//CHECK: define{{.*}} spir_func ptr addrspace(1) @addr_cast(ptr noundef readnone captures(ret: address, provenance) %[[x:.*]])
7171
//CHECK: %[[cast:.*]] ={{.*}} addrspacecast ptr %[[x]] to ptr addrspace(1)
7272
//CHECK: ret ptr addrspace(1) %[[cast]]
7373
global int* addr_cast(int *x) {

llvm/docs/LangRef.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12627,6 +12627,9 @@ result pointer is dereferenceable, the cast is assumed to be
1262712627
reversible (i.e. casting the result back to the original address space
1262812628
should yield the original bit pattern).
1262912629

12630+
Which address space casts are supported depends on the target. Unsupported
12631+
address space casts return :ref:`poison <poisonvalues>`.
12632+
1263012633
Example:
1263112634
""""""""
1263212635

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7486,6 +7486,8 @@ static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind,
74867486
case Instruction::FCmp:
74877487
case Instruction::GetElementPtr:
74887488
return false;
7489+
case Instruction::AddrSpaceCast:
7490+
return true;
74897491
default: {
74907492
const auto *CE = dyn_cast<ConstantExpr>(Op);
74917493
if (isa<CastInst>(Op) || (CE && CE->isCast()))

llvm/test/Transforms/Attributor/reduced/aapointer_info_map_invalidation.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ define amdgpu_kernel void @__omp_offloading_fd00_2c00523__ZN11qmcplusplus7ompBLA
88
; CHECK-NEXT: [[TMP1:%.*]] = alloca [0 x [0 x float]], i32 0, align 8, addrspace(5)
99
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
1010
; CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(5) [[TMP1]], align 8
11-
; CHECK-NEXT: [[TMP3:%.*]] = call fastcc i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr nofree noundef readonly align 8 captures(none) dereferenceable_or_null(8) [[TMP2]], i1 noundef false)
11+
; CHECK-NEXT: [[TMP3:%.*]] = call fastcc i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr nofree readonly align 8 captures(none) dereferenceable_or_null(8) [[TMP2]], i1 noundef false)
1212
; CHECK-NEXT: ret void
1313
;
1414
%1 = alloca [0 x [0 x float]], i32 0, align 8, addrspace(5)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine %s | FileCheck %s
3+
4+
; Check that `select B, true, C` isn't optimized to `or B, C`,
5+
; because the invalid addrspacecast %asc.shared introduces poison.
6+
define i1 @not_fold_select(ptr addrspace(1) noundef %x) {
7+
; CHECK-LABEL: define i1 @not_fold_select(
8+
; CHECK-SAME: ptr addrspace(1) noundef [[X:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr
11+
; CHECK-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[TMP0]])
12+
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr addrspace(3)
13+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq ptr addrspace(3) [[TMP2]], null
14+
; CHECK-NEXT: [[NOT_IS_SHARED:%.*]] = xor i1 [[TMP1]], true
15+
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[NOT_IS_SHARED]], i1 true, i1 [[TMP3]]
16+
; CHECK-NEXT: ret i1 [[TMP4]]
17+
;
18+
entry:
19+
%asc.flat = addrspacecast ptr addrspace(1) %x to ptr
20+
%is.shared = tail call i1 @llvm.amdgcn.is.shared(ptr %asc.flat)
21+
%asc.shared = addrspacecast ptr %asc.flat to ptr addrspace(3)
22+
%shared.addr = select i1 %is.shared, ptr addrspace(3) %asc.shared, ptr addrspace(3) null
23+
%result = icmp eq ptr addrspace(3) %shared.addr, null
24+
ret i1 %result
25+
}

0 commit comments

Comments
 (0)