diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 7dae4d30d31be..addb0e056ff9c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -289,6 +289,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SETCC, VT, Legal); setOperationAction(ISD::VSELECT, VT, Legal); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); } for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); @@ -350,7 +351,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); setOperationAction(ISD::SETCC, VT, Legal); setOperationAction(ISD::VSELECT, VT, Legal); @@ -497,6 +499,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); + case ISD::CONCAT_VECTORS: + return lowerCONCAT_VECTORS(Op, DAG); case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); case ISD::BITREVERSE: @@ -2520,6 +2524,72 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, return SDValue(); } +SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT ResVT = Op.getSimpleValueType(); + assert(ResVT.is256BitVector() && Op.getNumOperands() == 2); + + unsigned NumOperands = Op.getNumOperands(); + unsigned NumFreezeUndef = 0; + unsigned NumZero = 0; + unsigned NumNonZero = 0; + unsigned NonZeros = 0; + SmallSet Undefs; + for (unsigned i = 0; i != NumOperands; ++i) { + SDValue SubVec = Op.getOperand(i); + if (SubVec.isUndef()) + continue; + if (ISD::isFreezeUndef(SubVec.getNode())) { + // If the freeze(undef) has multiple uses then we must fold to zero. + if (SubVec.hasOneUse()) { + ++NumFreezeUndef; + } else { + ++NumZero; + Undefs.insert(SubVec); + } + } else if (ISD::isBuildVectorAllZeros(SubVec.getNode())) + ++NumZero; + else { + assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range. + NonZeros |= 1 << i; + ++NumNonZero; + } + } + + // If we have more than 2 non-zeros, build each half separately. + if (NumNonZero > 2) { + MVT HalfVT = ResVT.getHalfNumVectorElementsVT(); + ArrayRef Ops = Op->ops(); + SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT, + Ops.slice(0, NumOperands / 2)); + SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT, + Ops.slice(NumOperands / 2)); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); + } + + // Otherwise, build it up through insert_subvectors. + SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT) + : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT)) + : DAG.getUNDEF(ResVT)); + + // Replace Undef operands with ZeroVector. + for (SDValue U : Undefs) + DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType())); + + MVT SubVT = Op.getOperand(0).getSimpleValueType(); + unsigned NumSubElems = SubVT.getVectorNumElements(); + for (unsigned i = 0; i != NumOperands; ++i) { + if ((NonZeros & (1 << i)) == 0) + continue; + + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i), + DAG.getVectorIdxConstant(i * NumSubElems, DL)); + } + + return Vec; +} + SDValue LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 60dc2b385a75c..6b49a98f3ae46 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -376,6 +376,7 @@ class LoongArchTargetLowering : public TargetLowering { SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBITREVERSE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index ff7b0f2ae3f25..95e9fd49d1c0d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1860,12 +1860,6 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)), (XVFTINTRZ_LU_D v4f64:$vj)), sub_128)>; -// XVPERMI_Q -foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in -def : Pat<(vt (concat_vectors LSX128:$vd, LSX128:$vj)), - (XVPERMI_Q (SUBREG_TO_REG (i64 0), LSX128:$vd, sub_128), - (SUBREG_TO_REG (i64 0), LSX128:$vj, sub_128), 2)>; - // XVABSD_{B/H/W/D}[U] defm : PatXrXr; defm : PatXrXrU; @@ -1879,6 +1873,35 @@ def : Pat<(loongarch_xvmskgez (v32i8 LASX256:$vj)), (PseudoXVMSKGEZ_B LASX256:$v def : Pat<(loongarch_xvmskeqz (v32i8 LASX256:$vj)), (PseudoXVMSKEQZ_B LASX256:$vj)>; def : Pat<(loongarch_xvmsknez (v32i8 LASX256:$vj)), (PseudoXVMSKNEZ_B LASX256:$vj)>; +// Subvector tricks +// Patterns for insert_subvector/extract_subvector +multiclass subvector_subreg_lowering { + // A 128-bit subvector extract from the first 256-bit vector position is a + // subregister copy that needs no instruction. Likewise, a 128-bit subvector + // insert to the first 256-bit vector position is a subregister copy that needs + // no instruction. + def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), + (subVT (EXTRACT_SUBREG RC:$src, subIdx))>; + def : Pat<(VT (insert_subvector undef_or_freeze_undef, subRC:$src, (iPTR 0))), + (VT (INSERT_SUBREG (IMPLICIT_DEF), subRC:$src, subIdx))>; + + def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR hiIdx))), + (subVT (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), RC:$src, 1), subIdx))>; + def : Pat<(VT (insert_subvector RC:$vd, subRC:$vj, (iPTR 0))), + (VT (XVPERMI_Q RC:$vd, (INSERT_SUBREG (IMPLICIT_DEF), subRC:$vj, subIdx), 48))>; + def : Pat<(VT (insert_subvector RC:$vd, subRC:$vj, (iPTR hiIdx))), + (VT (XVPERMI_Q RC:$vd, (INSERT_SUBREG (IMPLICIT_DEF), subRC:$vj, subIdx), 2))>; +} + +defm : subvector_subreg_lowering; +defm : subvector_subreg_lowering; +defm : subvector_subreg_lowering; +defm : subvector_subreg_lowering; +defm : subvector_subreg_lowering; +defm : subvector_subreg_lowering; + } // Predicates = [HasExtLASX] /// Intrinsic pattern diff --git a/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll b/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll new file mode 100644 index 0000000000000..231e82a6d53ac --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll @@ -0,0 +1,218 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define <32 x i8> @concat_poison_v32i8_1(<16 x i8> %a) { +; CHECK-LABEL: concat_poison_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i8> %a, <16 x i8> poison, + <32 x i32> + ret <32 x i8> %1 +} + +define <32 x i8> @concat_poison_v32i8_2(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: concat_poison_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i8> %b, <16 x i8> poison, + <32 x i32> + ret <32 x i8> %1 +} + +define <32 x i8> @concat_vectors_v32i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: concat_vectors_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i8> %a, <16 x i8> %b, + <32 x i32> + ret <32 x i8> %1 +} + +define <16 x i16> @concat_poison_v16i16_1(<8 x i16> %a) { +; CHECK-LABEL: concat_poison_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i16> %a, <8 x i16> poison, + <16 x i32> + ret <16 x i16> %1 +} + +define <16 x i16> @concat_poison_v16i16_2(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: concat_poison_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i16> %b, <8 x i16> poison, + <16 x i32> + ret <16 x i16> %1 +} + +define <16 x i16> @concat_vectors_v16i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: concat_vectors_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i16> %a, <8 x i16> %b, + <16 x i32> + ret <16 x i16> %1 +} + +define <8 x i32> @concat_poison_v8i32_1(<4 x i32> %a) { +; CHECK-LABEL: concat_poison_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i32> %a, <4 x i32> poison, + <8 x i32> + ret <8 x i32> %1 +} + +define <8 x i32> @concat_poison_v8i32_2(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: concat_poison_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i32> %b, <4 x i32> poison, + <8 x i32> + ret <8 x i32> %1 +} + +define <8 x i32> @concat_vectors_v8i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: concat_vectors_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i32> %a, <4 x i32> %b, + <8 x i32> + ret <8 x i32> %1 +} + +define <8 x float> @concat_poison_v8f32_1(<4 x float> %a) { +; CHECK-LABEL: concat_poison_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x float> %a, <4 x float> poison, + <8 x i32> + ret <8 x float> %1 +} + +define <8 x float> @concat_poison_v8f32_2(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: concat_poison_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x float> %b, <4 x float> poison, + <8 x i32> + ret <8 x float> %1 +} + +define <8 x float> @concat_vectors_v8f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: concat_vectors_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x float> %a, <4 x float> %b, + <8 x i32> + ret <8 x float> %1 +} + +define <4 x i64> @concat_poison_v8i64_1(<2 x i64> %a) { +; CHECK-LABEL: concat_poison_v8i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <4 x i32> + ret <4 x i64> %1 +} + +define <4 x i64> @concat_poison_v8i64_2(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: concat_poison_v8i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x i64> %b, <2 x i64> poison, <4 x i32> + ret <4 x i64> %1 +} + +define <4 x i64> @concat_vectors_v8i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: concat_vectors_v8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> + ret <4 x i64> %1 +} + +define <4 x double> @concat_poison_v8f64_1(<2 x double> %a) { +; CHECK-LABEL: concat_poison_v8f64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x double> %a, <2 x double> poison, <4 x i32> + ret <4 x double> %1 +} + +define <4 x double> @concat_poison_v8f64_2(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: concat_poison_v8f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x double> %b, <2 x double> poison, <4 x i32> + ret <4 x double> %1 +} + +define <4 x double> @concat_vectors_v8f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: concat_vectors_v8f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> + ret <4 x double> %1 +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll b/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll new file mode 100644 index 0000000000000..7a90afca376db --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll @@ -0,0 +1,668 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64) + +define <8 x i32> @insert_lo128_v8i32_1(<4 x i32> %a) { +; CHECK-LABEL: insert_lo128_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %a, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_hi128_v8i32_1(<4 x i32> %a) { +; CHECK-LABEL: insert_hi128_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %a, i64 4) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_lo128_v8i32_2(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: insert_lo128_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %b, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_hi128_v8i32_2(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: insert_hi128_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %b, i64 4) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_lo128_v8i32_3(<8 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: insert_lo128_v8i32_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %a, <4 x i32> %b, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_hi128_v8i32_3(<8 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: insert_hi128_v8i32_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %a, <4 x i32> %b, i64 4) + ret <8 x i32> %1 +} + +declare <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float>, <4 x float>, i64) + +define <8 x float> @insert_lo128_v8f32_1(<4 x float> %a) { +; CHECK-LABEL: insert_lo128_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %a, i64 0) + ret <8 x float> %1 +} + +define <8 x float> @insert_hi128_v8f32_1(<4 x float> %a) { +; CHECK-LABEL: insert_hi128_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %a, i64 4) + ret <8 x float> %1 +} + +define <8 x float> @insert_lo128_v8f32_2(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: insert_lo128_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %b, i64 0) + ret <8 x float> %1 +} + +define <8 x float> @insert_hi128_v8f32_2(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: insert_hi128_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %b, i64 4) + ret <8 x float> %1 +} + +define <8 x float> @insert_lo128_v8f32_3(<8 x float> %a, <4 x float> %b) { +; CHECK-LABEL: insert_lo128_v8f32_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> %a, <4 x float> %b, i64 0) + ret <8 x float> %1 +} + +define <8 x float> @insert_hi128_v8f32_3(<8 x float> %a, <4 x float> %b) { +; CHECK-LABEL: insert_hi128_v8f32_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> %a, <4 x float> %b, i64 4) + ret <8 x float> %1 +} + +declare <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64>, <2 x i64>, i64) + +define <4 x i64> @insert_lo128_v4i64_1(<2 x i64> %a) { +; CHECK-LABEL: insert_lo128_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %a, i64 0) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_hi128_v4i64_1(<2 x i64> %a) { +; CHECK-LABEL: insert_hi128_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %a, i64 2) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_lo128_v4i64_2(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: insert_lo128_v4i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %b, i64 0) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_hi128_v4i64_2(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: insert_hi128_v4i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %b, i64 2) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_lo128_v4i64_3(<4 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: insert_lo128_v4i64_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> %a, <2 x i64> %b, i64 0) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_hi128_v4i64_3(<4 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: insert_hi128_v4i64_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> %a, <2 x i64> %b, i64 2) + ret <4 x i64> %1 +} + +declare <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double>, <2 x double>, i64) + +define <4 x double> @insert_lo128_v4f64_1(<2 x double> %a) { +; CHECK-LABEL: insert_lo128_v4f64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %a, i64 0) + ret <4 x double> %1 +} + +define <4 x double> @insert_hi128_v4f64_1(<2 x double> %a) { +; CHECK-LABEL: insert_hi128_v4f64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %a, i64 2) + ret <4 x double> %1 +} + +define <4 x double> @insert_lo128_v4f64_2(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: insert_lo128_v4f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %b, i64 0) + ret <4 x double> %1 +} + +define <4 x double> @insert_hi128_v4f64_2(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: insert_hi128_v4f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %b, i64 2) + ret <4 x double> %1 +} + +define <4 x double> @insert_lo128_v4f64_3(<4 x double> %a, <2 x double> %b) { +; CHECK-LABEL: insert_lo128_v4f64_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> %a, <2 x double> %b, i64 0) + ret <4 x double> %1 +} + +define <4 x double> @insert_hi128_v4f64_3(<4 x double> %a, <2 x double> %b) { +; CHECK-LABEL: insert_hi128_v4f64_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> %a, <2 x double> %b, i64 2) + ret <4 x double> %1 +} + +declare <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16>, <8 x i16>, i64) + +define <16 x i16> @insert_lo128_v16i16_1(<8 x i16> %a) { +; CHECK-LABEL: insert_lo128_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %a, i64 0) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_hi128_v16i16_1(<8 x i16> %a) { +; CHECK-LABEL: insert_hi128_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %a, i64 8) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_lo128_v16i16_2(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: insert_lo128_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %b, i64 0) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_hi128_v16i16_2(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: insert_hi128_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %b, i64 8) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_lo128_v16i16_3(<16 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: insert_lo128_v16i16_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> %a, <8 x i16> %b, i64 0) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_hi128_v16i16_3(<16 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: insert_hi128_v16i16_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> %a, <8 x i16> %b, i64 8) + ret <16 x i16> %1 +} + +declare <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8>, <16 x i8>, i64) + +define <32 x i8> @insert_lo128_v32i8_1(<16 x i8> %a) { +; CHECK-LABEL: insert_lo128_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %a, i64 0) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_hi128_v32i8_1(<16 x i8> %a) { +; CHECK-LABEL: insert_hi128_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %a, i64 16) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_lo128_v32i8_2(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: insert_lo128_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %b, i64 0) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_hi128_v32i8_2(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: insert_hi128_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %b, i64 16) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_lo128_v32i8_3(<32 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: insert_lo128_v32i8_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> %a, <16 x i8> %b, i64 0) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_hi128_v32i8_3(<32 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: insert_hi128_v32i8_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> %a, <16 x i8> %b, i64 16) + ret <32 x i8> %1 +} + +define <4 x i32> @extract_lo128_v8i32_1(<8 x i32> %a) { +; CHECK-LABEL: extract_lo128_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @extract_hi128_v8i32_1(<8 x i32> %a) { +; CHECK-LABEL: extract_hi128_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @extract_lo128_v8i32_2(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: extract_lo128_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i32> %b, <8 x i32> poison, <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @extract_hi128_v8i32_2(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: extract_hi128_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i32> %b, <8 x i32> poison, <4 x i32> + ret <4 x i32> %1 +} + +define <4 x float> @extract_lo128_v8f32_1(<8 x float> %a) { +; CHECK-LABEL: extract_lo128_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> + ret <4 x float> %1 +} + +define <4 x float> @extract_hi128_v8f32_1(<8 x float> %a) { +; CHECK-LABEL: extract_hi128_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> + ret <4 x float> %1 +} + +define <4 x float> @extract_lo128_v8f32_2(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: extract_lo128_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> + ret <4 x float> %1 +} + +define <4 x float> @extract_hi128_v8f32_2(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: extract_hi128_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> + ret <4 x float> %1 +} + +define <2 x i64> @extract_lo128_v4i64_1(<4 x i64> %a) { +; CHECK-LABEL: extract_lo128_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i64> %a, <4 x i64> poison, <2 x i32> + ret <2 x i64> %1 +} + +define <2 x i64> @extract_hi128_v4i64_1(<4 x i64> %a) { +; CHECK-LABEL: extract_hi128_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i64> %a, <4 x i64> poison, <2 x i32> + ret <2 x i64> %1 +} + +define <2 x i64> @extract_lo128_v4i64_2(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: extract_lo128_v4i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i64> %b, <4 x i64> poison, <2 x i32> + ret <2 x i64> %1 +} + +define <2 x i64> @extract_hi128_v4i64_2(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: extract_hi128_v4i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i64> %b, <4 x i64> poison, <2 x i32> + ret <2 x i64> %1 +} + +define <2 x double> @extract_lo128_v4f64_a(<4 x double> %a) { +; CHECK-LABEL: extract_lo128_v4f64_a: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> + ret <2 x double> %1 +} + +define <2 x double> @extract_hi128_v4f64_1(<4 x double> %a) { +; CHECK-LABEL: extract_hi128_v4f64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> + ret <2 x double> %1 +} + +define <2 x double> @extract_lo128_v4f64_2(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: extract_lo128_v4f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> + ret <2 x double> %1 +} + +define <2 x double> @extract_hi128_v4f64_2(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: extract_hi128_v4f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> + ret <2 x double> %1 +} + +define <8 x i16> @extract_lo128_v16i16_1(<16 x i16> %a) { +; CHECK-LABEL: extract_lo128_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i16> %a, <16 x i16> poison, + <8 x i32> + ret <8 x i16> %1 +} + +define <8 x i16> @extract_hi128_v16i16_1(<16 x i16> %a) { +; CHECK-LABEL: extract_hi128_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i16> %a, <16 x i16> poison, + <8 x i32> + ret <8 x i16> %1 +} + +define <8 x i16> @extract_lo128_v16i16_2(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: extract_lo128_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i16> %b, <16 x i16> poison, + <8 x i32> + ret <8 x i16> %1 +} + +define <8 x i16> @extract_hi128_v16i16_2(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: extract_hi128_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i16> %b, <16 x i16> poison, + <8 x i32> + ret <8 x i16> %1 +} + +define <16 x i8> @extract_lo128_v32i8_1(<32 x i8> %a) { +; CHECK-LABEL: extract_lo128_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <32 x i8> %a, <32 x i8> poison, + <16 x i32> + ret <16 x i8> %1 +} + +define <16 x i8> @extract_hi128_v32i8_1(<32 x i8> %a) { +; CHECK-LABEL: extract_hi128_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <32 x i8> %a, <32 x i8> poison, + <16 x i32> + ret <16 x i8> %1 +} + +define <16 x i8> @extract_lo128_v32i8_2(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: extract_lo128_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <32 x i8> %b, <32 x i8> poison, + <16 x i32> + ret <16 x i8> %1 +} + +define <16 x i8> @extract_hi128_v32i8_2(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: extract_hi128_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <32 x i8> %b, <32 x i8> poison, + <16 x i32> + ret <16 x i8> %1 +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll b/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll index 818bd4311615d..506b5c1232f25 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll @@ -18,10 +18,10 @@ define void @foo() { ; CHECK-NEXT: ld.d $a3, $a3, %got_pc_lo12(g_813) ; CHECK-NEXT: st.w $zero, $a1, 0 ; CHECK-NEXT: st.w $a2, $a3, 0 +; CHECK-NEXT: xvrepli.b $xr0, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: vrepli.b $vr0, 0 ; CHECK-NEXT: vst $vr0, $a0, 32 -; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 -; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: st.w $zero, $a0, 20 ; CHECK-NEXT: ret entry: