Skip to content

Commit 0bf5c7b

Browse files
committed
[InlineSpiller] Do not fold undef copies
Change-Id: I134bc03e7eeb7f5324c1ce7005b96b28d1b01643
1 parent fc9dd58 commit 0bf5c7b

File tree

2 files changed

+83
-0
lines changed

2 files changed

+83
-0
lines changed

llvm/lib/CodeGen/InlineSpiller.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -946,6 +946,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
946946
if (MO.isUse() && !MO.readsReg() && !MO.isTied())
947947
continue;
948948

949+
if (MI->isCopy() && MI->getOperand(1).isUndef())
950+
continue;
951+
949952
if (MO.isImplicit()) {
950953
ImpReg = MO.getReg();
951954
continue;
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs --start-before=greedy,2 --stop-after=greedy,2 %s -o - | FileCheck %s
3+
4+
# Make sure there's no machine verifier error
5+
6+
# If RA is unable to find a register to allocate, then cleanupFailedVReg will do ad-hoc rewriting and will insert undefs to make the LiveRanges workable.
7+
# %30:av_128 = COPY undef $vgpr0_vgpr1_vgpr2_vgpr3 is an example of such a rewrite / undef. If we were to want to spill %30, we should not be inserting
8+
# actual spill code, as the source operand is undef.
9+
# Check that there are no verfier issues with the LiveRange of $vgpr0_vgpr1_vgpr2_vgpr3 / that we do not insert spill code for %30.
10+
11+
12+
--- |
13+
define void @foo() #0 {
14+
ret void
15+
}
16+
17+
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
18+
19+
...
20+
21+
---
22+
name: foo
23+
tracksRegLiveness: true
24+
stack:
25+
- { id: 0, type: spill-slot, size: 32, alignment: 4 }
26+
machineFunctionInfo:
27+
maxKernArgAlign: 4
28+
isEntryFunction: true
29+
waveLimiter: true
30+
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
31+
stackPtrOffsetReg: '$sgpr32'
32+
frameOffsetReg: '$sgpr33'
33+
hasSpilledVGPRs: true
34+
argumentInfo:
35+
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
36+
dispatchPtr: { reg: '$sgpr4_sgpr5' }
37+
kernargSegmentPtr: { reg: '$sgpr6_sgpr7' }
38+
workGroupIDX: { reg: '$sgpr8' }
39+
privateSegmentWaveByteOffset: { reg: '$sgpr9' }
40+
body: |
41+
bb.0:
42+
; CHECK-LABEL: name: foo
43+
; CHECK: INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 10 /* regdef */, def %10, 10 /* regdef */, def %1, 10 /* regdef */, def %2, 10 /* regdef */, def $vgpr0_vgpr1_vgpr2_vgpr3, 10 /* regdef */, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
44+
; CHECK-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY undef $vgpr0_vgpr1_vgpr2_vgpr3
45+
; CHECK-NEXT: SI_SPILL_AV128_SAVE [[COPY]], %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5)
46+
; CHECK-NEXT: SI_SPILL_AV160_SAVE %2, %stack.1, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.1, align 4, addrspace 5)
47+
; CHECK-NEXT: SI_SPILL_AV256_SAVE %1, %stack.3, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.3, align 4, addrspace 5)
48+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_512 = COPY %10
49+
; CHECK-NEXT: SI_SPILL_V512_SAVE [[COPY1]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
50+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_512 = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
51+
; CHECK-NEXT: SI_SPILL_V512_SAVE [[COPY2]], %stack.6, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.6, align 4, addrspace 5)
52+
; CHECK-NEXT: INLINEASM &"; clobber", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
53+
; CHECK-NEXT: [[SI_SPILL_V512_RESTORE:%[0-9]+]]:vreg_512 = SI_SPILL_V512_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.6, align 4, addrspace 5)
54+
; CHECK-NEXT: $agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16 = COPY [[SI_SPILL_V512_RESTORE]]
55+
; CHECK-NEXT: [[SI_SPILL_V512_RESTORE1:%[0-9]+]]:vreg_512 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
56+
; CHECK-NEXT: SI_SPILL_V512_SAVE [[SI_SPILL_V512_RESTORE1]], %stack.4, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.4, align 4, addrspace 5)
57+
; CHECK-NEXT: [[SI_SPILL_AV256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_AV256_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.3, align 4, addrspace 5)
58+
; CHECK-NEXT: SI_SPILL_V256_SAVE [[SI_SPILL_AV256_RESTORE]], %stack.5, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.5, align 4, addrspace 5)
59+
; CHECK-NEXT: [[SI_SPILL_AV160_RESTORE:%[0-9]+]]:vreg_160 = SI_SPILL_AV160_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.1, align 4, addrspace 5)
60+
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_AV128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5)
61+
; CHECK-NEXT: [[SI_SPILL_AV512_RESTORE:%[0-9]+]]:av_512 = SI_SPILL_AV512_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.4, align 4, addrspace 5)
62+
; CHECK-NEXT: [[SI_SPILL_V256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.5, align 4, addrspace 5)
63+
; CHECK-NEXT: INLINEASM &"; use $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 9 /* reguse */, [[SI_SPILL_AV512_RESTORE]], 9 /* reguse */, [[SI_SPILL_V256_RESTORE]], 9 /* reguse */, [[SI_SPILL_AV160_RESTORE]], 9 /* reguse */, undef $vgpr0_vgpr1_vgpr2_vgpr3, 9 /* reguse */, $agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16
64+
; CHECK-NEXT: SI_RETURN
65+
INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 10, def %22:vreg_512, 10, def %25:vreg_256, 10, def %28:vreg_160, 10, def $vgpr0_vgpr1_vgpr2_vgpr3, 10, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
66+
%30:av_128 = COPY undef $vgpr0_vgpr1_vgpr2_vgpr3
67+
%27:av_160 = COPY %28:vreg_160
68+
%24:av_256 = COPY %25:vreg_256
69+
SI_SPILL_V512_SAVE %22:vreg_512, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
70+
%18:vreg_512 = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
71+
INLINEASM &"; clobber", 1 /* sideeffect attdialect */, 10, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
72+
$agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16 = COPY %18:vreg_512
73+
%23:vreg_512 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
74+
%26:vreg_256 = COPY %24:av_256
75+
%29:vreg_160 = COPY %27:av_160
76+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %30:av_128
77+
INLINEASM &"; use $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 9, %23:vreg_512, 9, %26:vreg_256, 9, %29:vreg_160, 9, undef $vgpr0_vgpr1_vgpr2_vgpr3, 9, $agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16
78+
SI_RETURN
79+
80+
...

0 commit comments

Comments
 (0)