Skip to content

Commit 66bd7eb

Browse files
[SVE] Use DUPM to handling more splat immediate cases.
NOTE: Only considers i64 based vectors at this time because smaller element types require extra isel operand parsing. Differential Revision: https://reviews.llvm.org/D118040
1 parent 5da7c04 commit 66bd7eb

File tree

6 files changed

+31
-30
lines changed

6 files changed

+31
-30
lines changed

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1708,6 +1708,9 @@ multiclass sve_int_dup_mask_imm<string asm> {
17081708
(!cast<Instruction>(NAME) ZPR32:$Zd, sve_preferred_logical_imm32:$imm), 6>;
17091709
def : InstAlias<"mov $Zd, $imm",
17101710
(!cast<Instruction>(NAME) ZPR64:$Zd, sve_preferred_logical_imm64:$imm), 5>;
1711+
1712+
def : Pat<(nxv2i64 (AArch64dup (i64 logical_imm64:$imm))),
1713+
(!cast<Instruction>(NAME) logical_imm64:$imm)>;
17111714
}
17121715

17131716
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,8 @@ define <vscale x 2 x i64> @smax_i64_neg(<vscale x 2 x i64> %a) {
133133
define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
134134
; CHECK-LABEL: smax_i64_out_of_range:
135135
; CHECK: // %bb.0:
136-
; CHECK-NEXT: mov w8, #65535
136+
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
137137
; CHECK-NEXT: ptrue p0.d
138-
; CHECK-NEXT: mov z1.d, x8
139138
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
140139
; CHECK-NEXT: ret
141140
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
@@ -277,9 +276,8 @@ define <vscale x 2 x i64> @smin_i64_neg(<vscale x 2 x i64> %a) {
277276
define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
278277
; CHECK-LABEL: smin_i64_out_of_range:
279278
; CHECK: // %bb.0:
280-
; CHECK-NEXT: mov w8, #65535
279+
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
281280
; CHECK-NEXT: ptrue p0.d
282-
; CHECK-NEXT: mov z1.d, x8
283281
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
284282
; CHECK-NEXT: ret
285283
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
@@ -385,9 +383,8 @@ define <vscale x 2 x i64> @umax_i64_pos(<vscale x 2 x i64> %a) {
385383
define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
386384
; CHECK-LABEL: umax_i64_out_of_range:
387385
; CHECK: // %bb.0:
388-
; CHECK-NEXT: mov w8, #65535
386+
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
389387
; CHECK-NEXT: ptrue p0.d
390-
; CHECK-NEXT: mov z1.d, x8
391388
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
392389
; CHECK-NEXT: ret
393390
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
@@ -493,9 +490,8 @@ define <vscale x 2 x i64> @umin_i64_pos(<vscale x 2 x i64> %a) {
493490
define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
494491
; CHECK-LABEL: umin_i64_out_of_range:
495492
; CHECK: // %bb.0:
496-
; CHECK-NEXT: mov w8, #65535
493+
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
497494
; CHECK-NEXT: ptrue p0.d
498-
; CHECK-NEXT: mov z1.d, x8
499495
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
500496
; CHECK-NEXT: ret
501497
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
@@ -627,9 +623,8 @@ define <vscale x 4 x i32> @mul_i32_range(<vscale x 4 x i32> %a) {
627623
define <vscale x 2 x i64> @mul_i64_range(<vscale x 2 x i64> %a) {
628624
; CHECK-LABEL: mul_i64_range:
629625
; CHECK: // %bb.0:
630-
; CHECK-NEXT: mov w8, #255
626+
; CHECK-NEXT: mov z1.d, #255 // =0xff
631627
; CHECK-NEXT: ptrue p0.d
632-
; CHECK-NEXT: mov z1.d, x8
633628
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
634629
; CHECK-NEXT: ret
635630
%elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0

llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -514,9 +514,8 @@ define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a) {
514514
define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
515515
; CHECK-LABEL: smax_i64_out_of_range:
516516
; CHECK: // %bb.0:
517-
; CHECK-NEXT: mov w8, #65535
518517
; CHECK-NEXT: ptrue p0.d
519-
; CHECK-NEXT: mov z1.d, x8
518+
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
520519
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
521520
; CHECK-NEXT: ret
522521
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -832,9 +831,8 @@ define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a) {
832831
define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
833832
; CHECK-LABEL: umax_i64_out_of_range:
834833
; CHECK: // %bb.0:
835-
; CHECK-NEXT: mov w8, #65535
836834
; CHECK-NEXT: ptrue p0.d
837-
; CHECK-NEXT: mov z1.d, x8
835+
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
838836
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
839837
; CHECK-NEXT: ret
840838
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -991,9 +989,8 @@ define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a) {
991989
define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
992990
; CHECK-LABEL: umin_i64_out_of_range:
993991
; CHECK: // %bb.0:
994-
; CHECK-NEXT: mov w8, #65535
995992
; CHECK-NEXT: ptrue p0.d
996-
; CHECK-NEXT: mov z1.d, x8
993+
; CHECK-NEXT: mov z1.d, #65535 // =0xffff
997994
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
998995
; CHECK-NEXT: ret
999996
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)

llvm/test/CodeGen/AArch64/sve-vector-splat.ll

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ define <vscale x 4 x i32> @sve_splat_4xi32_imm() {
7373
ret <vscale x 4 x i32> %splat
7474
}
7575

76-
define <vscale x 2 x i64> @sve_splat_2xi64_imm() {
77-
; CHECK-LABEL: sve_splat_2xi64_imm:
76+
define <vscale x 2 x i64> @sve_splat_2xi64_dup_imm() {
77+
; CHECK-LABEL: sve_splat_2xi64_dup_imm:
7878
; CHECK: // %bb.0:
7979
; CHECK-NEXT: mov z0.d, #1 // =0x1
8080
; CHECK-NEXT: ret
@@ -83,6 +83,16 @@ define <vscale x 2 x i64> @sve_splat_2xi64_imm() {
8383
ret <vscale x 2 x i64> %splat
8484
}
8585

86+
define <vscale x 2 x i64> @sve_splat_2xi64_dupm_imm() {
87+
; CHECK-LABEL: sve_splat_2xi64_dupm_imm:
88+
; CHECK: // %bb.0:
89+
; CHECK-NEXT: mov z0.d, #0xffff00000000
90+
; CHECK-NEXT: ret
91+
%ins = insertelement <vscale x 2 x i64> undef, i64 281470681743360, i32 0 ; 0xffff00000000
92+
%splat = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
93+
ret <vscale x 2 x i64> %splat
94+
}
95+
8696
;; Promote splats of smaller illegal integer vector types
8797

8898
define <vscale x 2 x i8> @sve_splat_2xi8(i8 %val) {
@@ -173,8 +183,7 @@ define <vscale x 2 x i32> @sve_splat_2xi32(i32 %val) {
173183
define <vscale x 2 x i32> @sve_splat_2xi32_imm() {
174184
; CHECK-LABEL: sve_splat_2xi32_imm:
175185
; CHECK: // %bb.0:
176-
; CHECK-NEXT: mov w8, #-1
177-
; CHECK-NEXT: mov z0.d, x8
186+
; CHECK-NEXT: mov z0.d, #0xffffffff
178187
; CHECK-NEXT: ret
179188
%ins = insertelement <vscale x 2 x i32> undef, i32 -1, i32 0
180189
%splat = shufflevector <vscale x 2 x i32> %ins, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@@ -530,9 +539,9 @@ define <vscale x 4 x float> @splat_nxv4f32_imm_out_of_range() {
530539
define <vscale x 2 x double> @splat_nxv2f64_imm_out_of_range() {
531540
; CHECK-LABEL: splat_nxv2f64_imm_out_of_range:
532541
; CHECK: // %bb.0:
533-
; CHECK-NEXT: adrp x8, .LCPI50_0
542+
; CHECK-NEXT: adrp x8, .LCPI51_0
534543
; CHECK-NEXT: ptrue p0.d
535-
; CHECK-NEXT: add x8, x8, :lo12:.LCPI50_0
544+
; CHECK-NEXT: add x8, x8, :lo12:.LCPI51_0
536545
; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8]
537546
; CHECK-NEXT: ret
538547
%1 = insertelement <vscale x 2 x double> undef, double 3.33, i32 0

llvm/test/CodeGen/AArch64/sve-vselect-imm.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -144,10 +144,9 @@ ret <vscale x 4 x i32> %sel
144144
define <vscale x 2 x i64> @sel_64_illegal_wrong_extension(<vscale x 2 x i1> %p) {
145145
; CHECK-LABEL: sel_64_illegal_wrong_extension:
146146
; CHECK: // %bb.0:
147-
; CHECK-NEXT: mov w8, #128
148-
; CHECK-NEXT: mov z1.d, #0 // =0x0
149-
; CHECK-NEXT: mov z0.d, x8
150-
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
147+
; CHECK-NEXT: mov z0.d, #0 // =0x0
148+
; CHECK-NEXT: mov z1.d, #128 // =0x80
149+
; CHECK-NEXT: mov z0.d, p0/m, z1.d
151150
; CHECK-NEXT: ret
152151
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
153152
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
@@ -370,8 +369,7 @@ ret <vscale x 4 x i32> %sel
370369
define <vscale x 2 x i64> @sel_merge_64_illegal_wrong_extension(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
371370
; CHECK-LABEL: sel_merge_64_illegal_wrong_extension:
372371
; CHECK: // %bb.0:
373-
; CHECK-NEXT: mov w8, #128
374-
; CHECK-NEXT: mov z1.d, x8
372+
; CHECK-NEXT: mov z1.d, #128 // =0x80
375373
; CHECK-NEXT: mov z0.d, p0/m, z1.d
376374
; CHECK-NEXT: ret
377375
%vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer

llvm/test/CodeGen/AArch64/sve2-int-mul.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,7 @@ define <vscale x 4 x i32> @mul_i32_imm_neg(<vscale x 4 x i32> %a) {
5959
define <vscale x 2 x i64> @mul_i64_imm(<vscale x 2 x i64> %a) {
6060
; CHECK-LABEL: mul_i64_imm:
6161
; CHECK: // %bb.0:
62-
; CHECK-NEXT: mov w8, #255
63-
; CHECK-NEXT: mov z1.d, x8
62+
; CHECK-NEXT: mov z1.d, #255 // =0xff
6463
; CHECK-NEXT: mul z0.d, z0.d, z1.d
6564
; CHECK-NEXT: ret
6665
%elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0

0 commit comments

Comments
 (0)