Skip to content

Commit eb7c947

Browse files
authored
AMDGPU: Correct legal literal operand logic for multiple uses (llvm#127594)
The same literal can be used multiple times in an instruction, not just once. We were not tracking the used value to verify this, so correct this. This helps avoid regressions in a future patch.
1 parent 93d3e20 commit eb7c947

File tree

4 files changed

+89
-20
lines changed

4 files changed

+89
-20
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5931,11 +5931,17 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
59315931
if (!MO)
59325932
MO = &MI.getOperand(OpIdx);
59335933

5934+
const MachineOperand *UsedLiteral = nullptr;
5935+
59345936
int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
59355937
int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0;
59365938
if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
5937-
if (!MO->isReg() && !isInlineConstant(*MO, OpInfo) && !LiteralLimit--)
5938-
return false;
5939+
if (!MO->isReg() && !isInlineConstant(*MO, OpInfo)) {
5940+
if (!LiteralLimit--)
5941+
return false;
5942+
5943+
UsedLiteral = MO;
5944+
}
59395945

59405946
SmallDenseSet<RegSubRegPair> SGPRsUsed;
59415947
if (MO->isReg())
@@ -5956,6 +5962,12 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
59565962
}
59575963
} else if (AMDGPU::isSISrcOperand(InstDesc, i) &&
59585964
!isInlineConstant(Op, InstDesc.operands()[i])) {
5965+
// The same literal may be used multiple times.
5966+
if (!UsedLiteral)
5967+
UsedLiteral = &Op;
5968+
else if (UsedLiteral->isIdenticalTo(Op))
5969+
continue;
5970+
59595971
if (!LiteralLimit--)
59605972
return false;
59615973
if (--ConstantBusLimit <= 0)

llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2162,8 +2162,7 @@ body: |
21622162
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
21632163
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
21642164
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
2165-
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2166-
; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
2165+
; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
21672166
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
21682167
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
21692168
; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2178,8 +2177,7 @@ body: |
21782177
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
21792178
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
21802179
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
2181-
; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2182-
; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
2180+
; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
21832181
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
21842182
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
21852183
; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2315,8 +2313,7 @@ body: |
23152313
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
23162314
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
23172315
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
2318-
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2319-
; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
2316+
; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
23202317
; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
23212318
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
23222319
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
@@ -2332,8 +2329,7 @@ body: |
23322329
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
23332330
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
23342331
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
2335-
; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2336-
; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
2332+
; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
23372333
; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
23382334
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
23392335
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
@@ -2469,8 +2465,7 @@ body: |
24692465
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
24702466
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
24712467
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
2472-
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2473-
; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
2468+
; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
24742469
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
24752470
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
24762471
; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2485,8 +2480,7 @@ body: |
24852480
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
24862481
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
24872482
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
2488-
; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2489-
; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
2483+
; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
24902484
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
24912485
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
24922486
; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2622,8 +2616,7 @@ body: |
26222616
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
26232617
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
26242618
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
2625-
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2626-
; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
2619+
; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
26272620
; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
26282621
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
26292622
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
@@ -2639,8 +2632,7 @@ body: |
26392632
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
26402633
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
26412634
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
2642-
; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
2643-
; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
2635+
; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
26442636
; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
26452637
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
26462638
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=si-fold-operands -o - %s | FileCheck %s
3+
4+
# The same literal may be used multiple times in different operands,
5+
# as long as it is the same value.
6+
7+
---
8+
name: fold_multiple_same_literal_use_0
9+
tracksRegLiveness: true
10+
body: |
11+
bb.0:
12+
liveins: $vgpr0
13+
14+
; CHECK-LABEL: name: fold_multiple_same_literal_use_0
15+
; CHECK: liveins: $vgpr0
16+
; CHECK-NEXT: {{ $}}
17+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
18+
; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, 1178657792, 0, 0, implicit $mode, implicit $exec
19+
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_DIV_SCALE_F32_e64_]]
20+
%0:vgpr_32 = COPY $vgpr0
21+
%1:sreg_32 = S_MOV_B32 1178657792
22+
%2:vgpr_32 = COPY %1
23+
%3:vgpr_32, %4:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, %2, 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
24+
S_ENDPGM 0, implicit %3
25+
...
26+
27+
---
28+
name: fold_multiple_same_literal_use_1
29+
tracksRegLiveness: true
30+
body: |
31+
bb.0:
32+
liveins: $vgpr0
33+
34+
; CHECK-LABEL: name: fold_multiple_same_literal_use_1
35+
; CHECK: liveins: $vgpr0
36+
; CHECK-NEXT: {{ $}}
37+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
38+
; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, 1178657792, 0, 0, implicit $mode, implicit $exec
39+
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_DIV_SCALE_F32_e64_]]
40+
%0:vgpr_32 = COPY $vgpr0
41+
%1:sreg_32 = S_MOV_B32 1178657792
42+
%2:vgpr_32 = COPY %1
43+
%3:vgpr_32, %4:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, %2, 0, 0, implicit $mode, implicit $exec
44+
S_ENDPGM 0, implicit %3
45+
...
46+
47+
---
48+
name: no_fold_multiple_same_literal_different_value
49+
tracksRegLiveness: true
50+
body: |
51+
bb.0:
52+
liveins: $vgpr0
53+
54+
; CHECK-LABEL: name: no_fold_multiple_same_literal_different_value
55+
; CHECK: liveins: $vgpr0
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
58+
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1178657793, implicit $exec
59+
; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
60+
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_DIV_SCALE_F32_e64_]]
61+
%0:vgpr_32 = COPY $vgpr0
62+
%1:sreg_32 = S_MOV_B32 1178657793
63+
%2:vgpr_32 = COPY %1
64+
%3:vgpr_32, %4:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, %2, 0, 0, implicit $mode, implicit $exec
65+
S_ENDPGM 0, implicit %3
66+
...

llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,7 @@ body: |
5555

5656
# GCN-LABEL: name: fma_sgpr_sgpr_use
5757
# GCN: %0:sgpr_32 = IMPLICIT_DEF
58-
# GCN-NEXT: %2:vgpr_32 = V_MOV_B32_e32 1234567, implicit $exec
59-
# GCN-NEXT: %3:vgpr_32 = V_FMAC_F32_e64 0, %0, 0, 1234567, 0, %2, 0, 0, implicit $mode, implicit $exec
58+
# GCN: %3:vgpr_32 = V_FMA_F32_e64 0, %0, 0, 1234567, 0, 1234567, 0, 0, implicit $mode, implicit $exec
6059
---
6160
name: fma_sgpr_sgpr_use
6261
body: |

0 commit comments

Comments
 (0)