Skip to content

Commit 8aa5b1a

Browse files
committed
optimize extracting i8/i16 element from hi128
1 parent b028fc3 commit 8aa5b1a

File tree

4 files changed

+23
-50
lines changed

4 files changed

+23
-50
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2525,12 +2525,9 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
25252525
SelectionDAG &DAG) const {
25262526
EVT VecTy = Op->getOperand(0)->getValueType(0);
25272527
SDValue Idx = Op->getOperand(1);
2528-
EVT EltTy = VecTy.getVectorElementType();
25292528
unsigned NumElts = VecTy.getVectorNumElements();
25302529

2531-
if (isa<ConstantSDNode>(Idx) &&
2532-
(EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
2533-
EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
2530+
if (isa<ConstantSDNode>(Idx) && Idx->getAsZExtVal() < NumElts)
25342531
return Op;
25352532

25362533
return SDValue();

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1794,6 +1794,18 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in {
17941794
}
17951795

17961796
// Vector extraction with constant index.
1797+
foreach imm = 16...31 in {
1798+
defvar Imm = !and(imm, 15);
1799+
def : Pat<(i64 (vector_extract v32i8:$xj, imm)),
1800+
(VPICKVE2GR_B (EXTRACT_SUBREG (XVPERMI_D v32i8:$xj, 14), sub_128),
1801+
Imm)>;
1802+
}
1803+
foreach imm = 8...15 in {
1804+
defvar Imm = !and(imm, 7);
1805+
def : Pat<(i64 (vector_extract v16i16:$xj, imm)),
1806+
(VPICKVE2GR_H (EXTRACT_SUBREG (XVPERMI_D v16i16:$xj, 14), sub_128),
1807+
Imm)>;
1808+
}
17971809
def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)),
17981810
(VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>;
17991811
def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)),

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,9 @@
44
define <32 x i8> @insert_extract_v32i8(<32 x i8> %a) nounwind {
55
; CHECK-LABEL: insert_extract_v32i8:
66
; CHECK: # %bb.0: # %entry
7-
; CHECK-NEXT: addi.d $sp, $sp, -64
8-
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
9-
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
10-
; CHECK-NEXT: addi.d $fp, $sp, 64
11-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
12-
; CHECK-NEXT: xvst $xr0, $sp, 0
13-
; CHECK-NEXT: ld.b $a0, $sp, 31
7+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
8+
; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 15
149
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1
15-
; CHECK-NEXT: addi.d $sp, $fp, -64
16-
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
17-
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
18-
; CHECK-NEXT: addi.d $sp, $sp, 64
1910
; CHECK-NEXT: ret
2011
entry:
2112
%b = extractelement <32 x i8> %a, i32 31
@@ -26,18 +17,9 @@ entry:
2617
define <16 x i16> @insert_extract_v16i16(<16 x i16> %a) nounwind {
2718
; CHECK-LABEL: insert_extract_v16i16:
2819
; CHECK: # %bb.0: # %entry
29-
; CHECK-NEXT: addi.d $sp, $sp, -64
30-
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
31-
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
32-
; CHECK-NEXT: addi.d $fp, $sp, 64
33-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
34-
; CHECK-NEXT: xvst $xr0, $sp, 0
35-
; CHECK-NEXT: ld.h $a0, $sp, 30
20+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
21+
; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 7
3622
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1
37-
; CHECK-NEXT: addi.d $sp, $fp, -64
38-
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
39-
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
40-
; CHECK-NEXT: addi.d $sp, $sp, 64
4123
; CHECK-NEXT: ret
4224
entry:
4325
%b = extractelement <16 x i16> %a, i32 15

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,14 @@
44
define <32 x i8> @insert_extract_v32i8(<32 x i8> %a) nounwind {
55
; CHECK-LABEL: insert_extract_v32i8:
66
; CHECK: # %bb.0: # %entry
7-
; CHECK-NEXT: addi.d $sp, $sp, -64
8-
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
9-
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
10-
; CHECK-NEXT: addi.d $fp, $sp, 64
11-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
127
; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
13-
; CHECK-NEXT: xvst $xr0, $sp, 0
14-
; CHECK-NEXT: ld.b $a1, $sp, 31
8+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
159
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1
10+
; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 15
1611
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
1712
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
18-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1
13+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a0, 1
1914
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
20-
; CHECK-NEXT: addi.d $sp, $fp, -64
21-
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
22-
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
23-
; CHECK-NEXT: addi.d $sp, $sp, 64
2415
; CHECK-NEXT: ret
2516
entry:
2617
%b_lo = extractelement <32 x i8> %a, i32 15
@@ -33,23 +24,14 @@ entry:
3324
define <16 x i16> @insert_extract_v16i16(<16 x i16> %a) nounwind {
3425
; CHECK-LABEL: insert_extract_v16i16:
3526
; CHECK: # %bb.0: # %entry
36-
; CHECK-NEXT: addi.d $sp, $sp, -64
37-
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
38-
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
39-
; CHECK-NEXT: addi.d $fp, $sp, 64
40-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
4127
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
42-
; CHECK-NEXT: xvst $xr0, $sp, 0
43-
; CHECK-NEXT: ld.h $a1, $sp, 30
28+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
4429
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1
30+
; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 7
4531
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
4632
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
47-
; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
33+
; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1
4834
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
49-
; CHECK-NEXT: addi.d $sp, $fp, -64
50-
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
51-
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
52-
; CHECK-NEXT: addi.d $sp, $sp, 64
5335
; CHECK-NEXT: ret
5436
entry:
5537
%b_lo = extractelement <16 x i16> %a, i32 7

0 commit comments

Comments
 (0)