Skip to content

Commit d1e3ab9

Browse files
authored
[AMDGPU] Use v_mov_b64 in codegen on gfx1250 (#148272)
1 parent d0a0a1a commit d1e3ab9

File tree

5 files changed

+168
-63
lines changed

5 files changed

+168
-63
lines changed

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1156,7 +1156,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
11561156

11571157
bool hasMadF16() const;
11581158

1159-
bool hasMovB64() const { return GFX940Insts; }
1159+
bool hasMovB64() const { return GFX940Insts || GFX1250Insts; }
11601160

11611161
bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; }
11621162

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2214,7 +2214,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
22142214
if (ST.hasMovB64()) {
22152215
MI.setDesc(get(AMDGPU::V_MOV_B64_e32));
22162216
if (SrcOp.isReg() || isInlineConstant(MI, 1) ||
2217-
isUInt<32>(SrcOp.getImm()))
2217+
isUInt<32>(SrcOp.getImm()) || ST.has64BitLiterals())
22182218
break;
22192219
}
22202220
if (SrcOp.isImm()) {

llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX942 %s
55
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
66
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
7+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass postrapseudos %s -o - | FileCheck -check-prefix=GFX1250 %s
78

89
---
910
name: copy_v64_to_v64
@@ -32,6 +33,10 @@ body: |
3233
; GFX10-NEXT: {{ $}}
3334
; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr2_vgpr3
3435
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3, implicit $exec
36+
; GFX1250-LABEL: name: copy_v64_to_v64
37+
; GFX1250: liveins: $vgpr2_vgpr3
38+
; GFX1250-NEXT: {{ $}}
39+
; GFX1250-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec
3540
$vgpr0_vgpr1 = COPY killed $vgpr2_vgpr3, implicit $exec
3641
...
3742

@@ -62,6 +67,10 @@ body: |
6267
; GFX10-NEXT: {{ $}}
6368
; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
6469
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit killed $sgpr2_sgpr3, implicit $exec
70+
; GFX1250-LABEL: name: copy_s64_to_v64
71+
; GFX1250: liveins: $sgpr2_sgpr3
72+
; GFX1250-NEXT: {{ $}}
73+
; GFX1250-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $sgpr2_sgpr3, implicit $exec, implicit $exec
6574
$vgpr0_vgpr1 = COPY killed $sgpr2_sgpr3, implicit $exec
6675
...
6776

@@ -94,6 +103,11 @@ body: |
94103
; GFX10-NEXT: {{ $}}
95104
; GFX10-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr2_agpr3
96105
; GFX10-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr2_agpr3, implicit $exec
106+
; GFX1250-LABEL: name: copy_a64_to_v64
107+
; GFX1250: liveins: $agpr2_agpr3
108+
; GFX1250-NEXT: {{ $}}
109+
; GFX1250-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr2_agpr3
110+
; GFX1250-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr2_agpr3, implicit $exec
97111
$vgpr0_vgpr1 = COPY killed $agpr2_agpr3, implicit $exec
98112
...
99113

@@ -130,6 +144,11 @@ body: |
130144
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5
131145
; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5
132146
; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5, implicit $exec
147+
; GFX1250-LABEL: name: copy_v128_to_v128_fwd
148+
; GFX1250: liveins: $vgpr2_vgpr3_vgpr4_vgpr5
149+
; GFX1250-NEXT: {{ $}}
150+
; GFX1250-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr2_vgpr3_vgpr4_vgpr5
151+
; GFX1250-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $vgpr4_vgpr5, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5, implicit $exec
133152
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed $vgpr2_vgpr3_vgpr4_vgpr5, implicit $exec
134153
...
135154

@@ -166,6 +185,11 @@ body: |
166185
; GFX10-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
167186
; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
168187
; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
188+
; GFX1250-LABEL: name: copy_v128_to_v128_back
189+
; GFX1250: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
190+
; GFX1250-NEXT: {{ $}}
191+
; GFX1250-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3
192+
; GFX1250-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
169193
$vgpr2_vgpr3_vgpr4_vgpr5 = COPY killed $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
170194
...
171195

@@ -202,6 +226,12 @@ body: |
202226
; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr4_vgpr5_vgpr6
203227
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6
204228
; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6, implicit $exec
229+
; GFX1250-LABEL: name: copy_v96_to_v96
230+
; GFX1250: liveins: $vgpr4_vgpr5_vgpr6
231+
; GFX1250-NEXT: {{ $}}
232+
; GFX1250-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr4_vgpr5_vgpr6
233+
; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6
234+
; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6, implicit $exec
205235
$vgpr0_vgpr1_vgpr2 = COPY killed $vgpr4_vgpr5_vgpr6, implicit $exec
206236
...
207237

@@ -232,6 +262,10 @@ body: |
232262
; GFX10-NEXT: {{ $}}
233263
; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr2_vgpr3
234264
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3, implicit $exec
265+
; GFX1250-LABEL: name: copy_v64_to_v64_undef_sub0
266+
; GFX1250: liveins: $vgpr3
267+
; GFX1250-NEXT: {{ $}}
268+
; GFX1250-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec
235269
$vgpr0_vgpr1 = COPY killed $vgpr2_vgpr3, implicit $exec
236270
...
237271

@@ -262,6 +296,10 @@ body: |
262296
; GFX10-NEXT: {{ $}}
263297
; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr2_vgpr3
264298
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3, implicit $exec
299+
; GFX1250-LABEL: name: copy_v64_to_v64_undef_sub1
300+
; GFX1250: liveins: $vgpr2
301+
; GFX1250-NEXT: {{ $}}
302+
; GFX1250-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec
265303
$vgpr0_vgpr1 = COPY killed $vgpr2_vgpr3, implicit $exec
266304
...
267305

@@ -298,6 +336,11 @@ body: |
298336
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit $sgpr4_sgpr5_sgpr6_sgpr7
299337
; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit $sgpr4_sgpr5_sgpr6_sgpr7
300338
; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr7, implicit $exec, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7
339+
; GFX1250-LABEL: name: copy_s128_to_v128_killed
340+
; GFX1250: liveins: $sgpr4_sgpr5_sgpr6_sgpr7
341+
; GFX1250-NEXT: {{ $}}
342+
; GFX1250-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr4_sgpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr4_sgpr5_sgpr6_sgpr7
343+
; GFX1250-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $sgpr6_sgpr7, implicit $exec, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7
301344
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed $sgpr4_sgpr5_sgpr6_sgpr7
302345
...
303346

@@ -330,6 +373,11 @@ body: |
330373
; GFX10-NEXT: {{ $}}
331374
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $vgpr2_vgpr3
332375
; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3, implicit $exec
376+
; GFX1250-LABEL: name: copy_v64_to_v64_unaligned
377+
; GFX1250: liveins: $vgpr2_vgpr3
378+
; GFX1250-NEXT: {{ $}}
379+
; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $vgpr2_vgpr3
380+
; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3, implicit $exec
333381
$vgpr1_vgpr2 = COPY killed $vgpr2_vgpr3, implicit $exec
334382
...
335383

@@ -362,6 +410,11 @@ body: |
362410
; GFX10-NEXT: {{ $}}
363411
; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr3_vgpr4
364412
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit killed $vgpr3_vgpr4, implicit $exec
413+
; GFX1250-LABEL: name: copy_v64_unaligned_to_v64
414+
; GFX1250: liveins: $vgpr3_vgpr4
415+
; GFX1250-NEXT: {{ $}}
416+
; GFX1250-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr3_vgpr4
417+
; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit killed $vgpr3_vgpr4, implicit $exec
365418
$vgpr0_vgpr1 = COPY killed $vgpr3_vgpr4, implicit $exec
366419
...
367420

@@ -402,6 +455,13 @@ body: |
402455
; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
403456
; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
404457
; GFX10-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec
458+
; GFX1250-LABEL: name: copy_v128_to_v128_unaligned
459+
; GFX1250: liveins: $vgpr8_vgpr9_vgpr10_vgpr11
460+
; GFX1250-NEXT: {{ $}}
461+
; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr8_vgpr9_vgpr10_vgpr11
462+
; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
463+
; GFX1250-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
464+
; GFX1250-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec
405465
$vgpr1_vgpr2_vgpr3_vgpr4 = COPY killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec
406466
...
407467

@@ -442,6 +502,13 @@ body: |
442502
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
443503
; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
444504
; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec
505+
; GFX1250-LABEL: name: copy_v128_unaligned_to_v128
506+
; GFX1250: liveins: $vgpr7_vgpr8_vgpr9_vgpr10
507+
; GFX1250-NEXT: {{ $}}
508+
; GFX1250-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr7_vgpr8_vgpr9_vgpr10
509+
; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
510+
; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
511+
; GFX1250-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec
445512
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec
446513
...
447514

@@ -474,6 +541,11 @@ body: |
474541
; GFX10-NEXT: {{ $}}
475542
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr8_sgpr9
476543
; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit killed $sgpr8_sgpr9, implicit $exec
544+
; GFX1250-LABEL: name: copy_s64_to_v64_unaligned
545+
; GFX1250: liveins: $sgpr8_sgpr9
546+
; GFX1250-NEXT: {{ $}}
547+
; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr8_sgpr9
548+
; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit killed $sgpr8_sgpr9, implicit $exec
477549
$vgpr1_vgpr2 = COPY killed $sgpr8_sgpr9, implicit $exec
478550
...
479551

@@ -514,6 +586,13 @@ body: |
514586
; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
515587
; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
516588
; GFX10-NEXT: $vgpr4 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec
589+
; GFX1250-LABEL: name: copy_s128_to_v128_unaligned
590+
; GFX1250: liveins: $sgpr8_sgpr9_sgpr10_sgpr11
591+
; GFX1250-NEXT: {{ $}}
592+
; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $sgpr8_sgpr9_sgpr10_sgpr11
593+
; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
594+
; GFX1250-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
595+
; GFX1250-NEXT: $vgpr4 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec
517596
$vgpr1_vgpr2_vgpr3_vgpr4 = COPY killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec
518597
...
519598

@@ -550,6 +629,12 @@ body: |
550629
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $vgpr8_vgpr9_vgpr10
551630
; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10
552631
; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10, implicit $exec
632+
; GFX1250-LABEL: name: copy_v96_to_v96_unaligned
633+
; GFX1250: liveins: $vgpr8_vgpr9_vgpr10
634+
; GFX1250-NEXT: {{ $}}
635+
; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $vgpr8_vgpr9_vgpr10
636+
; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10
637+
; GFX1250-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10, implicit $exec
553638
$vgpr1_vgpr2_vgpr3 = COPY killed $vgpr8_vgpr9_vgpr10, implicit $exec
554639
...
555640

@@ -586,6 +671,12 @@ body: |
586671
; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr7_vgpr8_vgpr9
587672
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9
588673
; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9, implicit $exec
674+
; GFX1250-LABEL: name: copy_v96_unaligned_to_v96
675+
; GFX1250: liveins: $vgpr7_vgpr8_vgpr9
676+
; GFX1250-NEXT: {{ $}}
677+
; GFX1250-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr7_vgpr8_vgpr9
678+
; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9
679+
; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9, implicit $exec
589680
$vgpr0_vgpr1_vgpr2 = COPY killed $vgpr7_vgpr8_vgpr9, implicit $exec
590681
...
591682

@@ -622,6 +713,12 @@ body: |
622713
; GFX10-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $sgpr0_sgpr1_sgpr2
623714
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
624715
; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
716+
; GFX1250-LABEL: name: copy_s96_to_v96
717+
; GFX1250: liveins: $sgpr0_sgpr1_sgpr2
718+
; GFX1250-NEXT: {{ $}}
719+
; GFX1250-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $sgpr0_sgpr1_sgpr2
720+
; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
721+
; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
625722
$vgpr0_vgpr1_vgpr2 = COPY killed $sgpr0_sgpr1_sgpr2, implicit $exec
626723
...
627724

@@ -658,5 +755,11 @@ body: |
658755
; GFX10-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $sgpr0_sgpr1_sgpr2
659756
; GFX10-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
660757
; GFX10-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
758+
; GFX1250-LABEL: name: copy_s96_to_v96_unaligned
759+
; GFX1250: liveins: $sgpr0_sgpr1_sgpr2
760+
; GFX1250-NEXT: {{ $}}
761+
; GFX1250-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $sgpr0_sgpr1_sgpr2
762+
; GFX1250-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
763+
; GFX1250-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
661764
$vgpr1_vgpr2_vgpr3 = COPY killed $sgpr0_sgpr1_sgpr2, implicit $exec
662765
...

0 commit comments

Comments
 (0)