Skip to content

Commit dc850fb

Browse files
committed
[AMDGPU] NFC. Assert that mask is full with VOPC DPP
VOPC DPP should not be formed when the row_mask and bank_mask are not 0xf (full) because the resulting VOP DPP would have different semantics than the MOV DPP followed by VOP. Existing checks in GCNDPPCombine cover this case but for different reasons, so assert the property for future-proofing. Reviewed By: nhaehnle Differential Revision: https://reviews.llvm.org/D130101
1 parent b32e600 commit dc850fb

File tree

2 files changed

+45
-1
lines changed

2 files changed

+45
-1
lines changed

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,18 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
202202
LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n");
203203
return nullptr;
204204
}
205+
int OrigOpE32 = AMDGPU::getVOPe32(OrigOp);
206+
// Prior checks cover Mask with VOPC condition, but not on purpose
207+
auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
208+
assert(RowMaskOpnd && RowMaskOpnd->isImm());
209+
auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
210+
assert(BankMaskOpnd && BankMaskOpnd->isImm());
211+
const bool MaskAllLanes =
212+
RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF;
213+
assert(MaskAllLanes ||
214+
!(TII->isVOPC(DPPOp) ||
215+
(TII->isVOP3(DPPOp) && OrigOpE32 != -1 && TII->isVOPC(OrigOpE32))) &&
216+
"VOPC cannot form DPP unless mask is full");
205217

206218
auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
207219
OrigMI.getDebugLoc(), TII->get(DPPOp))
@@ -222,7 +234,6 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
222234
// If we shrunk a 64bit vop3b to 32bits, just ignore the sdst
223235
}
224236

225-
int OrigOpE32 = AMDGPU::getVOPe32(OrigOp);
226237
const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
227238
if (OldIdx != -1) {
228239
assert(OldIdx == NumOperands);

llvm/test/CodeGen/AMDGPU/vopc_dpp.mir

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,36 @@ body: |
6767
V_CMP_LT_I32_e32 %0, %18, implicit-def $vcc, implicit $exec
6868
6969
...
70+
---
71+
72+
name: mask_not_full
73+
tracksRegLiveness: true
74+
body: |
75+
bb.0:
76+
liveins: $vgpr0, $vgpr1, $vgpr2
77+
78+
; GCN-LABEL: name: mask_not_full
79+
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
80+
; GCN-NEXT: {{ $}}
81+
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
82+
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
83+
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
84+
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
85+
; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec
86+
; GCN-NEXT: V_CMP_CLASS_F16_e32 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec
87+
; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec
88+
; GCN-NEXT: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec
89+
%0:vgpr_32 = COPY $vgpr0
90+
%1:vgpr_32 = COPY $vgpr1
91+
%2:vgpr_32 = IMPLICIT_DEF
92+
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
93+
94+
; Do not combine VOPC when row_mask or bank_mask is not 0xf
95+
; All cases are covered by generic rules for creating DPP instructions
96+
%4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec
97+
V_CMP_CLASS_F16_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
98+
99+
%5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec
100+
%6:sgpr_32 = V_CMP_GE_F16_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec
101+
102+
...

0 commit comments

Comments
 (0)