Skip to content

Commit 88099a7

Browse files
authored
[LoongArch] Custom lower vector trunc to vector shuffle (llvm#130938)
1 parent 3c0300d commit 88099a7

File tree

2 files changed

+66
-84
lines changed

2 files changed

+66
-84
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,12 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
297297
setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
298298
setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
299299
setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
300+
301+
for (MVT VT :
302+
{MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
303+
MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
304+
setOperationAction(ISD::TRUNCATE, VT, Custom);
305+
}
300306
}
301307

302308
// Set operations for 'LASX' feature.
@@ -3394,6 +3400,44 @@ void LoongArchTargetLowering::ReplaceNodeResults(
33943400
replaceCMP_XCHG_128Results(N, Results, DAG);
33953401
break;
33963402
}
3403+
case ISD::TRUNCATE: {
3404+
MVT VT = N->getSimpleValueType(0);
3405+
if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
3406+
return;
3407+
3408+
MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
3409+
SDValue In = N->getOperand(0);
3410+
EVT InVT = In.getValueType();
3411+
EVT InEltVT = InVT.getVectorElementType();
3412+
EVT EltVT = VT.getVectorElementType();
3413+
unsigned MinElts = VT.getVectorNumElements();
3414+
unsigned WidenNumElts = WidenVT.getVectorNumElements();
3415+
unsigned InBits = InVT.getSizeInBits();
3416+
3417+
if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
3418+
if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
3419+
int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
3420+
SmallVector<int, 16> TruncMask(WidenNumElts, -1);
3421+
for (unsigned I = 0; I < MinElts; ++I)
3422+
TruncMask[I] = Scale * I;
3423+
3424+
unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
3425+
MVT SVT = In.getSimpleValueType().getScalarType();
3426+
MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
3427+
SDValue WidenIn =
3428+
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
3429+
DAG.getVectorIdxConstant(0, DL));
3430+
assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
3431+
"Illegal vector type in truncation");
3432+
WidenIn = DAG.getBitcast(WidenVT, WidenIn);
3433+
Results.push_back(
3434+
DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
3435+
return;
3436+
}
3437+
}
3438+
3439+
break;
3440+
}
33973441
}
33983442
}
33993443

llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll

Lines changed: 22 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,7 @@ define void @load_trunc_2i64_to_2i32(ptr %ptr, ptr %dst) nounwind {
55
; CHECK-LABEL: load_trunc_2i64_to_2i32:
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: vld $vr0, $a0, 0
8-
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
9-
; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
10-
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
11-
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
12-
; CHECK-NEXT: vpackev.w $vr0, $vr0, $vr1
8+
; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 8
139
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
1410
; CHECK-NEXT: st.d $a0, $a1, 0
1511
; CHECK-NEXT: ret
@@ -23,12 +19,10 @@ define void @load_trunc_2i64_to_2i16(ptr %ptr, ptr %dst) nounwind {
2319
; CHECK-LABEL: load_trunc_2i64_to_2i16:
2420
; CHECK: # %bb.0:
2521
; CHECK-NEXT: vld $vr0, $a0, 0
26-
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
27-
; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
28-
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
29-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
30-
; CHECK-NEXT: vpackev.h $vr0, $vr0, $vr1
31-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
22+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
23+
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
24+
; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
25+
; CHECK-NEXT: vpickve2gr.w $a0, $vr1, 0
3226
; CHECK-NEXT: st.w $a0, $a1, 0
3327
; CHECK-NEXT: ret
3428
%a = load <2 x i64>, ptr %ptr
@@ -41,11 +35,9 @@ define void @load_trunc_2i64_to_2i8(ptr %ptr, ptr %dst) nounwind {
4135
; CHECK-LABEL: load_trunc_2i64_to_2i8:
4236
; CHECK: # %bb.0:
4337
; CHECK-NEXT: vld $vr0, $a0, 0
44-
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
45-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a0, 0
46-
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
47-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
48-
; CHECK-NEXT: vpackev.b $vr0, $vr0, $vr1
38+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
39+
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
40+
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
4941
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
5042
; CHECK-NEXT: st.h $a0, $a1, 0
5143
; CHECK-NEXT: ret
@@ -58,19 +50,10 @@ define void @load_trunc_2i64_to_2i8(ptr %ptr, ptr %dst) nounwind {
5850
define void @load_trunc_4i32_to_4i16(ptr %ptr, ptr %dst) nounwind {
5951
; CHECK-LABEL: load_trunc_4i32_to_4i16:
6052
; CHECK: # %bb.0:
61-
; CHECK-NEXT: addi.d $sp, $sp, -16
6253
; CHECK-NEXT: vld $vr0, $a0, 0
63-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3
64-
; CHECK-NEXT: st.h $a0, $sp, 6
65-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2
66-
; CHECK-NEXT: st.h $a0, $sp, 4
67-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
68-
; CHECK-NEXT: st.h $a0, $sp, 2
69-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
70-
; CHECK-NEXT: st.h $a0, $sp, 0
71-
; CHECK-NEXT: ld.d $a0, $sp, 0
54+
; CHECK-NEXT: vpickev.h $vr0, $vr0, $vr0
55+
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
7256
; CHECK-NEXT: st.d $a0, $a1, 0
73-
; CHECK-NEXT: addi.d $sp, $sp, 16
7457
; CHECK-NEXT: ret
7558
%a = load <4 x i32>, ptr %ptr
7659
%trunc = trunc <4 x i32> %a to <4 x i16>
@@ -81,20 +64,12 @@ define void @load_trunc_4i32_to_4i16(ptr %ptr, ptr %dst) nounwind {
8164
define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) nounwind {
8265
; CHECK-LABEL: load_trunc_4i32_to_4i8:
8366
; CHECK: # %bb.0:
84-
; CHECK-NEXT: addi.d $sp, $sp, -16
8567
; CHECK-NEXT: vld $vr0, $a0, 0
86-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3
87-
; CHECK-NEXT: st.b $a0, $sp, 3
88-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2
89-
; CHECK-NEXT: st.b $a0, $sp, 2
90-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
91-
; CHECK-NEXT: st.b $a0, $sp, 1
92-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
93-
; CHECK-NEXT: st.b $a0, $sp, 0
94-
; CHECK-NEXT: vld $vr0, $sp, 0
68+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
69+
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
70+
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
9571
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
9672
; CHECK-NEXT: st.w $a0, $a1, 0
97-
; CHECK-NEXT: addi.d $sp, $sp, 16
9873
; CHECK-NEXT: ret
9974
%a = load <4 x i32>, ptr %ptr
10075
%trunc = trunc <4 x i32> %a to <4 x i8>
@@ -105,27 +80,10 @@ define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) nounwind {
10580
define void @load_trunc_8i16_to_8i8(ptr %ptr, ptr %dst) nounwind {
10681
; CHECK-LABEL: load_trunc_8i16_to_8i8:
10782
; CHECK: # %bb.0:
108-
; CHECK-NEXT: addi.d $sp, $sp, -16
10983
; CHECK-NEXT: vld $vr0, $a0, 0
110-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
111-
; CHECK-NEXT: st.b $a0, $sp, 7
112-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
113-
; CHECK-NEXT: st.b $a0, $sp, 6
114-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
115-
; CHECK-NEXT: st.b $a0, $sp, 5
116-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
117-
; CHECK-NEXT: st.b $a0, $sp, 4
118-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
119-
; CHECK-NEXT: st.b $a0, $sp, 3
120-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
121-
; CHECK-NEXT: st.b $a0, $sp, 2
122-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
123-
; CHECK-NEXT: st.b $a0, $sp, 1
124-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
125-
; CHECK-NEXT: st.b $a0, $sp, 0
126-
; CHECK-NEXT: ld.d $a0, $sp, 0
84+
; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr0
85+
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
12786
; CHECK-NEXT: st.d $a0, $a1, 0
128-
; CHECK-NEXT: addi.d $sp, $sp, 16
12987
; CHECK-NEXT: ret
13088
%a = load <8 x i16>, ptr %ptr
13189
%trunc = trunc <8 x i16> %a to <8 x i8>
@@ -138,11 +96,7 @@ define void @load_trunc_2i32_to_2i16(ptr %ptr, ptr %dst) nounwind {
13896
; CHECK: # %bb.0:
13997
; CHECK-NEXT: ld.d $a0, $a0, 0
14098
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
141-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
142-
; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
143-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
144-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
145-
; CHECK-NEXT: vpackev.h $vr0, $vr0, $vr1
99+
; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 8
146100
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
147101
; CHECK-NEXT: st.w $a0, $a1, 0
148102
; CHECK-NEXT: ret
@@ -156,12 +110,10 @@ define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) nounwind {
156110
; CHECK-LABEL: load_trunc_2i32_to_2i8:
157111
; CHECK: # %bb.0:
158112
; CHECK-NEXT: ld.d $a0, $a0, 0
159-
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
160-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
161-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a0, 0
162-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
163-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
164-
; CHECK-NEXT: vpackev.b $vr0, $vr0, $vr1
113+
; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0)
114+
; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI7_0)
115+
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
116+
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
165117
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
166118
; CHECK-NEXT: st.h $a0, $a1, 0
167119
; CHECK-NEXT: ret
@@ -174,21 +126,11 @@ define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) nounwind {
174126
define void @load_trunc_4i16_to_4i8(ptr %ptr, ptr %dst) nounwind {
175127
; CHECK-LABEL: load_trunc_4i16_to_4i8:
176128
; CHECK: # %bb.0:
177-
; CHECK-NEXT: addi.d $sp, $sp, -16
178129
; CHECK-NEXT: ld.d $a0, $a0, 0
179130
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
180-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
181-
; CHECK-NEXT: st.b $a0, $sp, 3
182-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
183-
; CHECK-NEXT: st.b $a0, $sp, 2
184-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
185-
; CHECK-NEXT: st.b $a0, $sp, 1
186-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
187-
; CHECK-NEXT: st.b $a0, $sp, 0
188-
; CHECK-NEXT: vld $vr0, $sp, 0
131+
; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr0
189132
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
190133
; CHECK-NEXT: st.w $a0, $a1, 0
191-
; CHECK-NEXT: addi.d $sp, $sp, 16
192134
; CHECK-NEXT: ret
193135
%a = load <4 x i16>, ptr %ptr
194136
%trunc = trunc <4 x i16> %a to <4 x i8>
@@ -201,11 +143,7 @@ define void @load_trunc_2i16_to_2i8(ptr %ptr, ptr %dst) nounwind {
201143
; CHECK: # %bb.0:
202144
; CHECK-NEXT: ld.w $a0, $a0, 0
203145
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
204-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
205-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a0, 0
206-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
207-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
208-
; CHECK-NEXT: vpackev.b $vr0, $vr0, $vr1
146+
; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 8
209147
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
210148
; CHECK-NEXT: st.h $a0, $a1, 0
211149
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)