Skip to content

Commit 487fa6f

Browse files
committed
[AArch64][DAGCombine] Add performBuildVectorCombine 'extract_elt ~> anyext'
A build vector of two extracted elements is equivalent to an extract subvector where the inner vector is any-extended to the extract_vector_elt VT, because extract_vector_elt has the effect of an any-extend. (build_vector (extract_elt_i16_to_i32 vec Idx+0) (extract_elt_i16_to_i32 vec Idx+1)) => (extract_subvector (anyext_i16_to_i32 vec) Idx) Depends on D130697 Differential Revision: https://reviews.llvm.org/D130698
1 parent 210769f commit 487fa6f

8 files changed

+100
-114
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -888,7 +888,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
888888
setTargetDAGCombine({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND,
889889
ISD::VECTOR_SPLICE, ISD::SIGN_EXTEND_INREG,
890890
ISD::CONCAT_VECTORS, ISD::EXTRACT_SUBVECTOR,
891-
ISD::INSERT_SUBVECTOR, ISD::STORE});
891+
ISD::INSERT_SUBVECTOR, ISD::STORE, ISD::BUILD_VECTOR});
892892
if (Subtarget->supportsAddressTopByteIgnored())
893893
setTargetDAGCombine(ISD::LOAD);
894894

@@ -16031,6 +16031,49 @@ static SDValue performVectorAddSubExtCombine(SDNode *N, SelectionDAG &DAG) {
1603116031
return SDValue();
1603216032
}
1603316033

16034+
static SDValue performBuildVectorCombine(SDNode *N,
16035+
TargetLowering::DAGCombinerInfo &DCI,
16036+
SelectionDAG &DAG) {
16037+
SDLoc DL(N);
16038+
16039+
// A build vector of two extracted elements is equivalent to an
16040+
// extract subvector where the inner vector is any-extended to the
16041+
// extract_vector_elt VT.
16042+
// (build_vector (extract_elt_iXX_to_i32 vec Idx+0)
16043+
// (extract_elt_iXX_to_i32 vec Idx+1))
16044+
// => (extract_subvector (anyext_iXX_to_i32 vec) Idx)
16045+
16046+
// For now, only consider the v2i32 case, which arises as a result of
16047+
// legalization.
16048+
if (N->getValueType(0) != MVT::v2i32)
16049+
return SDValue();
16050+
16051+
SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1);
16052+
// Reminder, EXTRACT_VECTOR_ELT has the effect of any-extending to its VT.
16053+
if (Elt0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16054+
Elt1->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16055+
// Constant index.
16056+
isa<ConstantSDNode>(Elt0->getOperand(1)) &&
16057+
isa<ConstantSDNode>(Elt1->getOperand(1)) &&
16058+
// Both EXTRACT_VECTOR_ELT from same vector...
16059+
Elt0->getOperand(0) == Elt1->getOperand(0) &&
16060+
// ... and contiguous. First element's index +1 == second element's index.
16061+
Elt0->getConstantOperandVal(1) + 1 == Elt1->getConstantOperandVal(1)) {
16062+
SDValue VecToExtend = Elt0->getOperand(0);
16063+
EVT ExtVT = VecToExtend.getValueType().changeVectorElementType(MVT::i32);
16064+
if (!DAG.getTargetLoweringInfo().isTypeLegal(ExtVT))
16065+
return SDValue();
16066+
16067+
SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);
16068+
16069+
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, VecToExtend);
16070+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Ext,
16071+
SubvectorIdx);
16072+
}
16073+
16074+
return SDValue();
16075+
}
16076+
1603416077
static SDValue performAddSubCombine(SDNode *N,
1603516078
TargetLowering::DAGCombinerInfo &DCI,
1603616079
SelectionDAG &DAG) {
@@ -19500,6 +19543,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
1950019543
case ISD::ADD:
1950119544
case ISD::SUB:
1950219545
return performAddSubCombine(N, DCI, DAG);
19546+
case ISD::BUILD_VECTOR:
19547+
return performBuildVectorCombine(N, DCI, DAG);
1950319548
case AArch64ISD::ANDS:
1950419549
return performFlagSettingCombine(N, DCI, ISD::AND);
1950519550
case AArch64ISD::ADC:

llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,7 @@
66
define <2 x i16> @bitcast_v2i16_v2f16(<2 x half> %x) {
77
; CHECK-LABEL: bitcast_v2i16_v2f16:
88
; CHECK: // %bb.0:
9-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
10-
; CHECK-NEXT: umov w8, v0.h[0]
11-
; CHECK-NEXT: umov w9, v0.h[1]
12-
; CHECK-NEXT: fmov s0, w8
13-
; CHECK-NEXT: mov v0.s[1], w9
9+
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
1410
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1511
; CHECK-NEXT: ret
1612
%y = bitcast <2 x half> %x to <2 x i16>

llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,8 @@ define void @extract_subvector_v256i8(<256 x i8>* %a, <128 x i8>* %b) vscale_ran
101101
define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) vscale_range(2,0) #0 {
102102
; CHECK-LABEL: extract_subvector_v4i16:
103103
; CHECK: // %bb.0:
104-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
105-
; CHECK-NEXT: umov w8, v0.h[2]
106-
; CHECK-NEXT: umov w9, v0.h[3]
107-
; CHECK-NEXT: fmov s0, w8
108-
; CHECK-NEXT: mov v0.s[1], w9
104+
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
105+
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
109106
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
110107
; CHECK-NEXT: ret
111108
%ret = call <2 x i16> @llvm.vector.extract.v2i16.v4i16(<4 x i16> %op, i64 2)

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -575,10 +575,7 @@ define void @masked_gather_v2f16(<2 x half>* %a, <2 x half*>* %b) vscale_range(2
575575
; CHECK-NEXT: movi d0, #0000000000000000
576576
; CHECK-NEXT: ptrue p0.d, vl4
577577
; CHECK-NEXT: fcmeq v1.4h, v1.4h, #0.0
578-
; CHECK-NEXT: umov w8, v1.h[0]
579-
; CHECK-NEXT: umov w9, v1.h[1]
580-
; CHECK-NEXT: fmov s1, w8
581-
; CHECK-NEXT: mov v1.s[1], w9
578+
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
582579
; CHECK-NEXT: shl v1.2s, v1.2s, #16
583580
; CHECK-NEXT: sshr v1.2s, v1.2s, #16
584581
; CHECK-NEXT: fmov w8, s1

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,7 @@ define <2 x half> @masked_load_v2f16(<2 x half>* %ap, <2 x half>* %bp) vscale_ra
1717
; CHECK-NEXT: ldr s2, [x1]
1818
; CHECK-NEXT: ptrue p0.h, vl4
1919
; CHECK-NEXT: fcmeq v1.4h, v1.4h, v2.4h
20-
; CHECK-NEXT: umov w8, v1.h[0]
21-
; CHECK-NEXT: umov w9, v1.h[1]
22-
; CHECK-NEXT: fmov s1, w8
23-
; CHECK-NEXT: mov v1.s[1], w9
20+
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
2421
; CHECK-NEXT: shl v1.2s, v1.2s, #16
2522
; CHECK-NEXT: sshr v1.2s, v1.2s, #16
2623
; CHECK-NEXT: fmov w8, s1

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -538,10 +538,7 @@ define void @masked_scatter_v2f16(<2 x half>* %a, <2 x half*>* %b) vscale_range(
538538
; CHECK-NEXT: ptrue p0.d, vl4
539539
; CHECK-NEXT: fcmeq v2.4h, v1.4h, #0.0
540540
; CHECK-NEXT: uunpklo z1.s, z1.h
541-
; CHECK-NEXT: umov w8, v2.h[0]
542-
; CHECK-NEXT: umov w9, v2.h[1]
543-
; CHECK-NEXT: fmov s2, w8
544-
; CHECK-NEXT: mov v2.s[1], w9
541+
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
545542
; CHECK-NEXT: shl v2.2s, v2.2s, #16
546543
; CHECK-NEXT: sshr v2.2s, v2.2s, #16
547544
; CHECK-NEXT: fmov w8, s2

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,7 @@ define void @masked_store_v2f16(<2 x half>* %ap, <2 x half>* %bp) vscale_range(2
1717
; CHECK-NEXT: ldr s2, [x1]
1818
; CHECK-NEXT: ptrue p0.h, vl4
1919
; CHECK-NEXT: fcmeq v2.4h, v1.4h, v2.4h
20-
; CHECK-NEXT: umov w8, v2.h[0]
21-
; CHECK-NEXT: umov w9, v2.h[1]
22-
; CHECK-NEXT: fmov s2, w8
23-
; CHECK-NEXT: mov v2.s[1], w9
20+
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
2421
; CHECK-NEXT: shl v2.2s, v2.2s, #16
2522
; CHECK-NEXT: sshr v2.2s, v2.2s, #16
2623
; CHECK-NEXT: fmov w8, s2

llvm/test/CodeGen/AArch64/vector-fcvt.ll

Lines changed: 47 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -194,23 +194,16 @@ define <8 x float> @uitofp_i64_float(<8 x i64> %a) {
194194
define <4 x double> @sitofp_v4i8_double(<4 x i8> %a) {
195195
; CHECK-LABEL: sitofp_v4i8_double:
196196
; CHECK: // %bb.0:
197-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
198-
; CHECK-NEXT: umov w8, v0.h[0]
199-
; CHECK-NEXT: umov w9, v0.h[2]
200-
; CHECK-NEXT: umov w10, v0.h[1]
201-
; CHECK-NEXT: fmov s1, w8
202-
; CHECK-NEXT: umov w8, v0.h[3]
203-
; CHECK-NEXT: fmov s0, w9
204-
; CHECK-NEXT: mov v1.s[1], w10
205-
; CHECK-NEXT: mov v0.s[1], w8
206-
; CHECK-NEXT: shl v1.2s, v1.2s, #24
197+
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
198+
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
207199
; CHECK-NEXT: shl v0.2s, v0.2s, #24
208-
; CHECK-NEXT: sshr v1.2s, v1.2s, #24
209200
; CHECK-NEXT: sshr v0.2s, v0.2s, #24
201+
; CHECK-NEXT: shl v1.2s, v1.2s, #24
202+
; CHECK-NEXT: sshll v0.2d, v0.2s, #0
203+
; CHECK-NEXT: sshr v1.2s, v1.2s, #24
204+
; CHECK-NEXT: scvtf v0.2d, v0.2d
210205
; CHECK-NEXT: sshll v1.2d, v1.2s, #0
211-
; CHECK-NEXT: sshll v2.2d, v0.2s, #0
212-
; CHECK-NEXT: scvtf v0.2d, v1.2d
213-
; CHECK-NEXT: scvtf v1.2d, v2.2d
206+
; CHECK-NEXT: scvtf v1.2d, v1.2d
214207
; CHECK-NEXT: ret
215208
%1 = sitofp <4 x i8> %a to <4 x double>
216209
ret <4 x double> %1
@@ -333,39 +326,26 @@ define <16 x double> @sitofp_v16i8_double(<16 x i8> %a) {
333326
define <8 x double> @sitofp_i16_double(<8 x i16> %a) {
334327
; CHECK-LABEL: sitofp_i16_double:
335328
; CHECK: // %bb.0:
336-
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
337-
; CHECK-NEXT: umov w8, v0.h[0]
338-
; CHECK-NEXT: umov w9, v0.h[2]
339-
; CHECK-NEXT: umov w11, v0.h[1]
340-
; CHECK-NEXT: umov w10, v1.h[0]
341-
; CHECK-NEXT: umov w12, v1.h[2]
342-
; CHECK-NEXT: fmov s2, w8
343-
; CHECK-NEXT: umov w8, v0.h[3]
344-
; CHECK-NEXT: fmov s0, w9
345-
; CHECK-NEXT: umov w9, v1.h[1]
346-
; CHECK-NEXT: fmov s3, w10
347-
; CHECK-NEXT: umov w10, v1.h[3]
348-
; CHECK-NEXT: fmov s1, w12
349-
; CHECK-NEXT: mov v0.s[1], w8
350-
; CHECK-NEXT: mov v2.s[1], w11
351-
; CHECK-NEXT: mov v3.s[1], w9
352-
; CHECK-NEXT: mov v1.s[1], w10
353-
; CHECK-NEXT: shl v0.2s, v0.2s, #16
354-
; CHECK-NEXT: shl v2.2s, v2.2s, #16
355-
; CHECK-NEXT: sshr v0.2s, v0.2s, #16
356-
; CHECK-NEXT: shl v3.2s, v3.2s, #16
357-
; CHECK-NEXT: shl v1.2s, v1.2s, #16
329+
; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
330+
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
331+
; CHECK-NEXT: shl v2.2s, v1.2s, #16
332+
; CHECK-NEXT: shl v3.2s, v0.2s, #16
333+
; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
358334
; CHECK-NEXT: sshr v2.2s, v2.2s, #16
359-
; CHECK-NEXT: sshll v4.2d, v0.2s, #0
360-
; CHECK-NEXT: sshr v0.2s, v3.2s, #16
361-
; CHECK-NEXT: sshr v1.2s, v1.2s, #16
335+
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
336+
; CHECK-NEXT: sshr v3.2s, v3.2s, #16
362337
; CHECK-NEXT: sshll v2.2d, v2.2s, #0
363-
; CHECK-NEXT: sshll v3.2d, v0.2s, #0
364-
; CHECK-NEXT: sshll v5.2d, v1.2s, #0
365-
; CHECK-NEXT: scvtf v0.2d, v2.2d
366-
; CHECK-NEXT: scvtf v1.2d, v4.2d
367-
; CHECK-NEXT: scvtf v2.2d, v3.2d
368-
; CHECK-NEXT: scvtf v3.2d, v5.2d
338+
; CHECK-NEXT: shl v1.2s, v1.2s, #16
339+
; CHECK-NEXT: shl v0.2s, v0.2s, #16
340+
; CHECK-NEXT: scvtf v2.2d, v2.2d
341+
; CHECK-NEXT: sshr v1.2s, v1.2s, #16
342+
; CHECK-NEXT: sshr v0.2s, v0.2s, #16
343+
; CHECK-NEXT: sshll v3.2d, v3.2s, #0
344+
; CHECK-NEXT: sshll v4.2d, v1.2s, #0
345+
; CHECK-NEXT: sshll v1.2d, v0.2s, #0
346+
; CHECK-NEXT: scvtf v0.2d, v3.2d
347+
; CHECK-NEXT: scvtf v1.2d, v1.2d
348+
; CHECK-NEXT: scvtf v3.2d, v4.2d
369349
; CHECK-NEXT: ret
370350
%1 = sitofp <8 x i16> %a to <8 x double>
371351
ret <8 x double> %1
@@ -402,22 +382,15 @@ define <8 x double> @sitofp_i64_double(<8 x i64> %a) {
402382
define <4 x double> @uitofp_v4i8_double(<4 x i8> %a) {
403383
; CHECK-LABEL: uitofp_v4i8_double:
404384
; CHECK: // %bb.0:
405-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
406-
; CHECK-NEXT: umov w8, v0.h[0]
407-
; CHECK-NEXT: umov w9, v0.h[2]
408-
; CHECK-NEXT: umov w10, v0.h[1]
385+
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
409386
; CHECK-NEXT: movi d1, #0x0000ff000000ff
410-
; CHECK-NEXT: fmov s2, w8
411-
; CHECK-NEXT: umov w8, v0.h[3]
412-
; CHECK-NEXT: fmov s0, w9
413-
; CHECK-NEXT: mov v2.s[1], w10
414-
; CHECK-NEXT: mov v0.s[1], w8
415-
; CHECK-NEXT: and v2.8b, v2.8b, v1.8b
387+
; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
416388
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
417-
; CHECK-NEXT: ushll v1.2d, v2.2s, #0
418-
; CHECK-NEXT: ushll v2.2d, v0.2s, #0
419-
; CHECK-NEXT: ucvtf v0.2d, v1.2d
420-
; CHECK-NEXT: ucvtf v1.2d, v2.2d
389+
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
390+
; CHECK-NEXT: and v1.8b, v2.8b, v1.8b
391+
; CHECK-NEXT: ushll v1.2d, v1.2s, #0
392+
; CHECK-NEXT: ucvtf v0.2d, v0.2d
393+
; CHECK-NEXT: ucvtf v1.2d, v1.2d
421394
; CHECK-NEXT: ret
422395
%1 = uitofp <4 x i8> %a to <4 x double>
423396
ret <4 x double> %1
@@ -530,36 +503,23 @@ define <16 x double> @uitofp_v16i8_double(<16 x i8> %a) {
530503
define <8 x double> @uitofp_i16_double(<8 x i16> %a) {
531504
; CHECK-LABEL: uitofp_i16_double:
532505
; CHECK: // %bb.0:
533-
; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
534-
; CHECK-NEXT: umov w8, v0.h[0]
535-
; CHECK-NEXT: umov w9, v0.h[2]
536-
; CHECK-NEXT: umov w11, v0.h[1]
537506
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
538-
; CHECK-NEXT: umov w10, v2.h[0]
539-
; CHECK-NEXT: umov w12, v2.h[2]
540-
; CHECK-NEXT: fmov s3, w8
541-
; CHECK-NEXT: umov w8, v0.h[3]
542-
; CHECK-NEXT: fmov s0, w9
543-
; CHECK-NEXT: umov w9, v2.h[1]
544-
; CHECK-NEXT: fmov s4, w10
545-
; CHECK-NEXT: umov w10, v2.h[3]
546-
; CHECK-NEXT: fmov s2, w12
547-
; CHECK-NEXT: mov v0.s[1], w8
548-
; CHECK-NEXT: mov v3.s[1], w11
549-
; CHECK-NEXT: mov v4.s[1], w9
550-
; CHECK-NEXT: mov v2.s[1], w10
551-
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
552-
; CHECK-NEXT: and v3.8b, v3.8b, v1.8b
553-
; CHECK-NEXT: ushll v5.2d, v0.2s, #0
554-
; CHECK-NEXT: and v0.8b, v4.8b, v1.8b
555-
; CHECK-NEXT: and v1.8b, v2.8b, v1.8b
507+
; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
508+
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
509+
; CHECK-NEXT: and v3.8b, v2.8b, v1.8b
510+
; CHECK-NEXT: and v4.8b, v0.8b, v1.8b
511+
; CHECK-NEXT: ext v2.16b, v2.16b, v2.16b, #8
512+
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
556513
; CHECK-NEXT: ushll v3.2d, v3.2s, #0
557-
; CHECK-NEXT: ushll v2.2d, v0.2s, #0
558-
; CHECK-NEXT: ushll v4.2d, v1.2s, #0
559-
; CHECK-NEXT: ucvtf v0.2d, v3.2d
560-
; CHECK-NEXT: ucvtf v1.2d, v5.2d
561-
; CHECK-NEXT: ucvtf v2.2d, v2.2d
562-
; CHECK-NEXT: ucvtf v3.2d, v4.2d
514+
; CHECK-NEXT: ushll v4.2d, v4.2s, #0
515+
; CHECK-NEXT: and v2.8b, v2.8b, v1.8b
516+
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
517+
; CHECK-NEXT: ushll v5.2d, v2.2s, #0
518+
; CHECK-NEXT: ucvtf v2.2d, v3.2d
519+
; CHECK-NEXT: ushll v1.2d, v0.2s, #0
520+
; CHECK-NEXT: ucvtf v0.2d, v4.2d
521+
; CHECK-NEXT: ucvtf v1.2d, v1.2d
522+
; CHECK-NEXT: ucvtf v3.2d, v5.2d
563523
; CHECK-NEXT: ret
564524
%1 = uitofp <8 x i16> %a to <8 x double>
565525
ret <8 x double> %1

0 commit comments

Comments
 (0)