Skip to content

Commit 543f948

Browse files
authored
[AMDGPU] Preserve exact flag for lshr (#146744)
When reducing 64-bit lshr to 32-bit preserve exact flag. Alive2 verification: https://alive2.llvm.org/ce/z/LcnX7V --------- Signed-off-by: John Lu <John.Lu@amd.com>
1 parent d3d8ef7 commit 543f948

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4328,7 +4328,8 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
43284328
Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, LHSSL, TargetType, SplitLHS, One);
43294329
}
43304330

4331-
SDValue NewShift = DAG.getNode(ISD::SRL, SL, TargetType, Hi, ShiftAmt);
4331+
SDValue NewShift =
4332+
DAG.getNode(ISD::SRL, SL, TargetType, Hi, ShiftAmt, N->getFlags());
43324333

43334334
SDValue Vec;
43344335
if (VT.isVector()) {

llvm/test/CodeGen/AMDGPU/srl64_reduce_flags.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ define i64 @srl_exact(i64 %arg0, i64 %shift_amt) {
3030
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[DEF3]]
3131
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, killed [[COPY4]], %subreg.sub1
3232
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
33-
; CHECK-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 killed [[COPY5]], killed [[COPY3]], implicit $exec
33+
; CHECK-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = exact V_LSHRREV_B32_e64 killed [[COPY5]], killed [[COPY3]], implicit $exec
3434
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
3535
; CHECK-NEXT: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]]
3636
; CHECK-NEXT: $vgpr1 = COPY [[V_MOV_B32_e32_]]

0 commit comments

Comments
 (0)