Skip to content

Commit 82c0a53

Browse files
committed
[LoongArch] Pre-commit for optimizing insert extracted pair elements
1 parent 1b7cbe1 commit 82c0a53

File tree

1 file changed

+132
-0
lines changed

1 file changed

+132
-0
lines changed
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
3+
4+
define <32 x i8> @insert_extract_v32i8(<32 x i8> %a) nounwind {
5+
; CHECK-LABEL: insert_extract_v32i8:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: addi.d $sp, $sp, -64
8+
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
9+
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
10+
; CHECK-NEXT: addi.d $fp, $sp, 64
11+
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
12+
; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
13+
; CHECK-NEXT: xvst $xr0, $sp, 0
14+
; CHECK-NEXT: ld.b $a1, $sp, 31
15+
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1
16+
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
17+
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
18+
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1
19+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
20+
; CHECK-NEXT: addi.d $sp, $fp, -64
21+
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
22+
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
23+
; CHECK-NEXT: addi.d $sp, $sp, 64
24+
; CHECK-NEXT: ret
25+
entry:
26+
%b_lo = extractelement <32 x i8> %a, i32 15
27+
%b_hi = extractelement <32 x i8> %a, i32 31
28+
%c = insertelement <32 x i8> %a, i8 %b_lo, i32 1
29+
%d = insertelement <32 x i8> %c, i8 %b_hi, i32 17
30+
ret <32 x i8> %d
31+
}
32+
33+
define <16 x i16> @insert_extract_v16i16(<16 x i16> %a) nounwind {
34+
; CHECK-LABEL: insert_extract_v16i16:
35+
; CHECK: # %bb.0: # %entry
36+
; CHECK-NEXT: addi.d $sp, $sp, -64
37+
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
38+
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
39+
; CHECK-NEXT: addi.d $fp, $sp, 64
40+
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
41+
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
42+
; CHECK-NEXT: xvst $xr0, $sp, 0
43+
; CHECK-NEXT: ld.h $a1, $sp, 30
44+
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1
45+
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
46+
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
47+
; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
48+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
49+
; CHECK-NEXT: addi.d $sp, $fp, -64
50+
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
51+
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
52+
; CHECK-NEXT: addi.d $sp, $sp, 64
53+
; CHECK-NEXT: ret
54+
entry:
55+
%b_lo = extractelement <16 x i16> %a, i32 7
56+
%b_hi = extractelement <16 x i16> %a, i32 15
57+
%c = insertelement <16 x i16> %a, i16 %b_lo, i32 1
58+
%d = insertelement <16 x i16> %c, i16 %b_hi, i32 9
59+
ret <16 x i16> %d
60+
}
61+
62+
define <8 x i32> @insert_extract_v8i32(<8 x i32> %a) nounwind {
63+
; CHECK-LABEL: insert_extract_v8i32:
64+
; CHECK: # %bb.0: # %entry
65+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
66+
; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 7
67+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
68+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 5
69+
; CHECK-NEXT: ret
70+
entry:
71+
%b_lo = extractelement <8 x i32> %a, i32 3
72+
%b_hi = extractelement <8 x i32> %a, i32 7
73+
%c = insertelement <8 x i32> %a, i32 %b_lo, i32 1
74+
%d = insertelement <8 x i32> %c, i32 %b_hi, i32 5
75+
ret <8 x i32> %d
76+
}
77+
78+
define <8 x float> @insert_extract_v8f32(<8 x float> %a) nounwind {
79+
; CHECK-LABEL: insert_extract_v8f32:
80+
; CHECK: # %bb.0: # %entry
81+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
82+
; CHECK-NEXT: movgr2fr.w $fa1, $a0
83+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
84+
; CHECK-NEXT: movgr2fr.w $fa2, $a0
85+
; CHECK-NEXT: movfr2gr.s $a0, $fa1
86+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
87+
; CHECK-NEXT: movfr2gr.s $a0, $fa2
88+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5
89+
; CHECK-NEXT: ret
90+
entry:
91+
%b_lo = extractelement <8 x float> %a, i32 3
92+
%b_hi = extractelement <8 x float> %a, i32 7
93+
%c = insertelement <8 x float> %a, float %b_lo, i32 1
94+
%d = insertelement <8 x float> %c, float %b_hi, i32 5
95+
ret <8 x float> %d
96+
}
97+
98+
define <4 x i64> @insert_extract_v4i64(<4 x i64> %a) nounwind {
99+
; CHECK-LABEL: insert_extract_v4i64:
100+
; CHECK: # %bb.0: # %entry
101+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
102+
; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3
103+
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
104+
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2
105+
; CHECK-NEXT: ret
106+
entry:
107+
%b_lo = extractelement <4 x i64> %a, i32 1
108+
%b_hi = extractelement <4 x i64> %a, i32 3
109+
%c = insertelement <4 x i64> %a, i64 %b_lo, i32 0
110+
%d = insertelement <4 x i64> %c, i64 %b_hi, i32 2
111+
ret <4 x i64> %d
112+
}
113+
114+
define <4 x double> @insert_extract_v4f64(<4 x double> %a) nounwind {
115+
; CHECK-LABEL: insert_extract_v4f64:
116+
; CHECK: # %bb.0: # %entry
117+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
118+
; CHECK-NEXT: movgr2fr.d $fa1, $a0
119+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
120+
; CHECK-NEXT: movgr2fr.d $fa2, $a0
121+
; CHECK-NEXT: movfr2gr.d $a0, $fa1
122+
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
123+
; CHECK-NEXT: movfr2gr.d $a0, $fa2
124+
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2
125+
; CHECK-NEXT: ret
126+
entry:
127+
%b_lo = extractelement <4 x double> %a, i32 1
128+
%b_hi = extractelement <4 x double> %a, i32 3
129+
%c = insertelement <4 x double> %a, double %b_lo, i32 0
130+
%d = insertelement <4 x double> %c, double %b_hi, i32 2
131+
ret <4 x double> %d
132+
}

0 commit comments

Comments
 (0)