Skip to content

Commit 00a0512

Browse files
committed
optimize extracting two elements when lasx supported
1 parent 8aa5b1a commit 00a0512

File tree

2 files changed

+64
-32
lines changed

2 files changed

+64
-32
lines changed

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,6 +1282,32 @@ multiclass PatCCXrXrF<CondCode CC, string Inst> {
12821282
(!cast<LAInst>(Inst#"_D") LASX256:$xj, LASX256:$xk)>;
12831283
}
12841284

1285+
multiclass PairInsertExtractPatV8<ValueType vecty, ValueType elemty> {
1286+
foreach imm1 = 0...3 in {
1287+
foreach imm2 = 0...3 in {
1288+
defvar Imm = !or(!shl(imm2, 4), imm1);
1289+
def : Pat<(vector_insert (vector_insert vecty:$xd,
1290+
(elemty (vector_extract vecty:$xj, imm1)), imm2),
1291+
(elemty (vector_extract vecty:$xj, !add(imm1, 4))),
1292+
!add(imm2, 4)),
1293+
(XVEXTRINS_W $xd, $xj, Imm)>;
1294+
}
1295+
}
1296+
}
1297+
1298+
multiclass PairInsertExtractPatV4<ValueType vecty, ValueType elemty> {
1299+
foreach imm1 = 0...1 in {
1300+
foreach imm2 = 0...1 in {
1301+
defvar Imm = !or(!shl(imm2, 4), imm1);
1302+
def : Pat<(vector_insert (vector_insert vecty:$xd,
1303+
(elemty (vector_extract vecty:$xj, imm1)), imm2),
1304+
(elemty (vector_extract vecty:$xj, !add(imm1, 2))),
1305+
!add(imm2, 2)),
1306+
(XVEXTRINS_D $xd, $xj, Imm)>;
1307+
}
1308+
}
1309+
}
1310+
12851311
let Predicates = [HasExtLASX] in {
12861312

12871313
// XVADD_{B/H/W/D}
@@ -1582,6 +1608,38 @@ defm : PatCCXrXrF<SETUNE, "XVFCMP_CUNE">;
15821608
defm : PatCCXrXrF<SETO, "XVFCMP_COR">;
15831609
defm : PatCCXrXrF<SETUO, "XVFCMP_CUN">;
15841610

1611+
// Insert two elements extracted from vector into vector. (The positions
1612+
// of the two elements must be same in the source or destination vector's
1613+
// front and back 128bits.)
1614+
// 2*XVPICKVE2GR_{W/D} + 2*XVINSGR2VR_{W/D} -> XVEXTRINS_{W/D}
1615+
// XVPERMI_D + 2*XVPICKVE2GR_{B/H} + 2*PseudoXVINSGR2VR_{B/H} -> XVEXTRINS_{W/D}
1616+
foreach imm1 = 0...15 in {
1617+
foreach imm2 = 0...15 in {
1618+
defvar Imm = !or(!shl(imm2, 4), imm1);
1619+
def : Pat<(vector_insert (vector_insert v32i8:$xd,
1620+
(GRLenVT (vector_extract v32i8:$xj, imm1)), imm2),
1621+
(GRLenVT (vector_extract v32i8:$xj, !add(imm1, 16))),
1622+
!add(imm2, 16)),
1623+
(XVEXTRINS_B $xd, $xj, Imm)>;
1624+
}
1625+
}
1626+
1627+
foreach imm1 = 0...7 in {
1628+
foreach imm2 = 0...7 in {
1629+
defvar Imm = !or(!shl(imm2, 4), imm1);
1630+
def : Pat<(vector_insert (vector_insert v16i16:$xd,
1631+
(GRLenVT (vector_extract v16i16:$xj, imm1)), imm2),
1632+
(GRLenVT (vector_extract v16i16:$xj, !add(imm1, 8))),
1633+
!add(imm2, 8)),
1634+
(XVEXTRINS_H $xd, $xj, Imm)>;
1635+
}
1636+
}
1637+
1638+
defm : PairInsertExtractPatV8<v8i32, GRLenVT>;
1639+
defm : PairInsertExtractPatV8<v8f32, f32>;
1640+
defm : PairInsertExtractPatV4<v4i64, GRLenVT>;
1641+
defm : PairInsertExtractPatV4<v4f64, f64>;
1642+
15851643
// PseudoXVINSGR2VR_{B/H}
15861644
def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm),
15871645
(PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>;

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll

Lines changed: 6 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,7 @@
44
define <32 x i8> @insert_extract_v32i8(<32 x i8> %a) nounwind {
55
; CHECK-LABEL: insert_extract_v32i8:
66
; CHECK: # %bb.0: # %entry
7-
; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
8-
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
9-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1
10-
; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 15
11-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
12-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
13-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a0, 1
14-
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
7+
; CHECK-NEXT: xvextrins.b $xr0, $xr0, 31
158
; CHECK-NEXT: ret
169
entry:
1710
%b_lo = extractelement <32 x i8> %a, i32 15
@@ -24,14 +17,7 @@ entry:
2417
define <16 x i16> @insert_extract_v16i16(<16 x i16> %a) nounwind {
2518
; CHECK-LABEL: insert_extract_v16i16:
2619
; CHECK: # %bb.0: # %entry
27-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
28-
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
29-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1
30-
; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 7
31-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
32-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
33-
; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1
34-
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
20+
; CHECK-NEXT: xvextrins.h $xr0, $xr0, 23
3521
; CHECK-NEXT: ret
3622
entry:
3723
%b_lo = extractelement <16 x i16> %a, i32 7
@@ -44,10 +30,7 @@ entry:
4430
define <8 x i32> @insert_extract_v8i32(<8 x i32> %a) nounwind {
4531
; CHECK-LABEL: insert_extract_v8i32:
4632
; CHECK: # %bb.0: # %entry
47-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
48-
; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 7
49-
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
50-
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 5
33+
; CHECK-NEXT: xvextrins.w $xr0, $xr0, 19
5134
; CHECK-NEXT: ret
5235
entry:
5336
%b_lo = extractelement <8 x i32> %a, i32 3
@@ -60,10 +43,7 @@ entry:
6043
define <8 x float> @insert_extract_v8f32(<8 x float> %a) nounwind {
6144
; CHECK-LABEL: insert_extract_v8f32:
6245
; CHECK: # %bb.0: # %entry
63-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
64-
; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 7
65-
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
66-
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 5
46+
; CHECK-NEXT: xvextrins.w $xr0, $xr0, 19
6747
; CHECK-NEXT: ret
6848
entry:
6949
%b_lo = extractelement <8 x float> %a, i32 3
@@ -76,10 +56,7 @@ entry:
7656
define <4 x i64> @insert_extract_v4i64(<4 x i64> %a) nounwind {
7757
; CHECK-LABEL: insert_extract_v4i64:
7858
; CHECK: # %bb.0: # %entry
79-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
80-
; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3
81-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
82-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2
59+
; CHECK-NEXT: xvextrins.d $xr0, $xr0, 1
8360
; CHECK-NEXT: ret
8461
entry:
8562
%b_lo = extractelement <4 x i64> %a, i32 1
@@ -92,10 +69,7 @@ entry:
9269
define <4 x double> @insert_extract_v4f64(<4 x double> %a) nounwind {
9370
; CHECK-LABEL: insert_extract_v4f64:
9471
; CHECK: # %bb.0: # %entry
95-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
96-
; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3
97-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
98-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2
72+
; CHECK-NEXT: xvextrins.d $xr0, $xr0, 1
9973
; CHECK-NEXT: ret
10074
entry:
10175
%b_lo = extractelement <4 x double> %a, i32 1

0 commit comments

Comments
 (0)