diff --git a/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll b/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll new file mode 100644 index 0000000000000..d9df27b43e69c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll @@ -0,0 +1,230 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define <32 x i8> @concat_poison_v32i8_1(<16 x i8> %a) { +; CHECK-LABEL: concat_poison_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i8> %a, <16 x i8> poison, + <32 x i32> + ret <32 x i8> %1 +} + +define <32 x i8> @concat_poison_v32i8_2(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: concat_poison_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i8> %b, <16 x i8> poison, + <32 x i32> + ret <32 x i8> %1 +} + +define <32 x i8> @concat_vectors_v32i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: concat_vectors_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i8> %a, <16 x i8> %b, + <32 x i32> + ret <32 x i8> %1 +} + +define <16 x i16> @concat_poison_v16i16_1(<8 x i16> %a) { +; CHECK-LABEL: concat_poison_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i16> %a, <8 x i16> poison, + <16 x i32> + ret <16 x i16> %1 +} + +define <16 x i16> @concat_poison_v16i16_2(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: concat_poison_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i16> %b, <8 x i16> poison, + <16 x i32> + ret <16 x i16> %1 +} + +define <16 x i16> @concat_vectors_v16i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: concat_vectors_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i16> %a, <8 x i16> %b, + <16 x i32> + ret <16 x i16> %1 +} + +define <8 x i32> @concat_poison_v8i32_1(<4 x i32> %a) { +; CHECK-LABEL: concat_poison_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i32> %a, <4 x i32> poison, + <8 x i32> + ret <8 x i32> %1 +} + +define <8 x i32> @concat_poison_v8i32_2(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: concat_poison_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i32> %b, <4 x i32> poison, + <8 x i32> + ret <8 x i32> %1 +} + +define <8 x i32> @concat_vectors_v8i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: concat_vectors_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i32> %a, <4 x i32> %b, + <8 x i32> + ret <8 x i32> %1 +} + +define <8 x float> @concat_poison_v8f32_1(<4 x float> %a) { +; CHECK-LABEL: concat_poison_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x float> %a, <4 x float> poison, + <8 x i32> + ret <8 x float> %1 +} + +define <8 x float> @concat_poison_v8f32_2(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: concat_poison_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x float> %b, <4 x float> poison, + <8 x i32> + ret <8 x float> %1 +} + +define <8 x float> @concat_vectors_v8f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: concat_vectors_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x float> %a, <4 x float> %b, + <8 x i32> + ret <8 x float> %1 +} + +define <4 x i64> @concat_poison_v8i64_1(<2 x i64> %a) { +; CHECK-LABEL: concat_poison_v8i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <4 x i32> + ret <4 x i64> %1 +} + +define <4 x i64> @concat_poison_v8i64_2(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: concat_poison_v8i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x i64> %b, <2 x i64> poison, <4 x i32> + ret <4 x i64> %1 +} + +define <4 x i64> @concat_vectors_v8i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: concat_vectors_v8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> + ret <4 x i64> %1 +} + +define <4 x double> @concat_poison_v8f64_1(<2 x double> %a) { +; CHECK-LABEL: concat_poison_v8f64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x double> %a, <2 x double> poison, <4 x i32> + ret <4 x double> %1 +} + +define <4 x double> @concat_poison_v8f64_2(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: concat_poison_v8f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x double> %b, <2 x double> poison, <4 x i32> + ret <4 x double> %1 +} + +define <4 x double> @concat_vectors_v8f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: concat_vectors_v8f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> + ret <4 x double> %1 +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll b/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll new file mode 100644 index 0000000000000..be7e38d602b29 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll @@ -0,0 +1,1478 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64) + +define <8 x i32> @insert_lo128_v8i32_1(<4 x i32> %a) { +; CHECK-LABEL: insert_lo128_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %a, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_hi128_v8i32_1(<4 x i32> %a) { +; CHECK-LABEL: insert_hi128_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr0, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %a, i64 4) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_lo128_v8i32_2(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: insert_lo128_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %b, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_hi128_v8i32_2(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: insert_hi128_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr1, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %b, i64 4) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_lo128_v8i32_3(<8 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: insert_lo128_v8i32_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %a, <4 x i32> %b, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_hi128_v8i32_3(<8 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: insert_hi128_v8i32_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vst $vr1, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %a, <4 x i32> %b, i64 4) + ret <8 x i32> %1 +} + +declare <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float>, <4 x float>, i64) + +define <8 x float> @insert_lo128_v8f32_1(<4 x float> %a) { +; CHECK-LABEL: insert_lo128_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %a, i64 0) + ret <8 x float> %1 +} + +define <8 x float> @insert_hi128_v8f32_1(<4 x float> %a) { +; CHECK-LABEL: insert_hi128_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr0, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %a, i64 4) + ret <8 x float> %1 +} + +define <8 x float> @insert_lo128_v8f32_2(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: insert_lo128_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %b, i64 0) + ret <8 x float> %1 +} + +define <8 x float> @insert_hi128_v8f32_2(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: insert_hi128_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr1, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %b, i64 4) + ret <8 x float> %1 +} + +define <8 x float> @insert_lo128_v8f32_3(<8 x float> %a, <4 x float> %b) { +; CHECK-LABEL: insert_lo128_v8f32_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> %a, <4 x float> %b, i64 0) + ret <8 x float> %1 +} + +define <8 x float> @insert_hi128_v8f32_3(<8 x float> %a, <4 x float> %b) { +; CHECK-LABEL: insert_hi128_v8f32_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vst $vr1, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> %a, <4 x float> %b, i64 4) + ret <8 x float> %1 +} + +declare <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64>, <2 x i64>, i64) + +define <4 x i64> @insert_lo128_v4i64_1(<2 x i64> %a) { +; CHECK-LABEL: insert_lo128_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %a, i64 0) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_hi128_v4i64_1(<2 x i64> %a) { +; CHECK-LABEL: insert_hi128_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr0, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %a, i64 2) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_lo128_v4i64_2(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: insert_lo128_v4i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %b, i64 0) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_hi128_v4i64_2(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: insert_hi128_v4i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr1, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %b, i64 2) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_lo128_v4i64_3(<4 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: insert_lo128_v4i64_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> %a, <2 x i64> %b, i64 0) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_hi128_v4i64_3(<4 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: insert_hi128_v4i64_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vst $vr1, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> %a, <2 x i64> %b, i64 2) + ret <4 x i64> %1 +} + +declare <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double>, <2 x double>, i64) + +define <4 x double> @insert_lo128_v4f64_1(<2 x double> %a) { +; CHECK-LABEL: insert_lo128_v4f64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %a, i64 0) + ret <4 x double> %1 +} + +define <4 x double> @insert_hi128_v4f64_1(<2 x double> %a) { +; CHECK-LABEL: insert_hi128_v4f64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr0, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %a, i64 2) + ret <4 x double> %1 +} + +define <4 x double> @insert_lo128_v4f64_2(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: insert_lo128_v4f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %b, i64 0) + ret <4 x double> %1 +} + +define <4 x double> @insert_hi128_v4f64_2(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: insert_hi128_v4f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr1, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %b, i64 2) + ret <4 x double> %1 +} + +define <4 x double> @insert_lo128_v4f64_3(<4 x double> %a, <2 x double> %b) { +; CHECK-LABEL: insert_lo128_v4f64_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> %a, <2 x double> %b, i64 0) + ret <4 x double> %1 +} + +define <4 x double> @insert_hi128_v4f64_3(<4 x double> %a, <2 x double> %b) { +; CHECK-LABEL: insert_hi128_v4f64_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vst $vr1, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> %a, <2 x double> %b, i64 2) + ret <4 x double> %1 +} + +declare <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16>, <8 x i16>, i64) + +define <16 x i16> @insert_lo128_v16i16_1(<8 x i16> %a) { +; CHECK-LABEL: insert_lo128_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %a, i64 0) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_hi128_v16i16_1(<8 x i16> %a) { +; CHECK-LABEL: insert_hi128_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr0, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %a, i64 8) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_lo128_v16i16_2(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: insert_lo128_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %b, i64 0) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_hi128_v16i16_2(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: insert_hi128_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr1, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %b, i64 8) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_lo128_v16i16_3(<16 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: insert_lo128_v16i16_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> %a, <8 x i16> %b, i64 0) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_hi128_v16i16_3(<16 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: insert_hi128_v16i16_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vst $vr1, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> %a, <8 x i16> %b, i64 8) + ret <16 x i16> %1 +} + +declare <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8>, <16 x i8>, i64) + +define <32 x i8> @insert_lo128_v32i8_1(<16 x i8> %a) { +; CHECK-LABEL: insert_lo128_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %a, i64 0) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_hi128_v32i8_1(<16 x i8> %a) { +; CHECK-LABEL: insert_hi128_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr0, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %a, i64 16) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_lo128_v32i8_2(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: insert_lo128_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %b, i64 0) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_hi128_v32i8_2(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: insert_hi128_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: vst $vr1, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %b, i64 16) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_lo128_v32i8_3(<32 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: insert_lo128_v32i8_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> %a, <16 x i8> %b, i64 0) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_hi128_v32i8_3(<32 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: insert_hi128_v32i8_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vst $vr1, $sp, 16 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> %a, <16 x i8> %b, i64 16) + ret <32 x i8> %1 +} + +define <4 x i32> @extract_lo128_v8i32_1(<8 x i32> %a) { +; CHECK-LABEL: extract_lo128_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @extract_hi128_v8i32_1(<8 x i32> %a) { +; CHECK-LABEL: extract_hi128_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 16 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @extract_lo128_v8i32_2(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: extract_lo128_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i32> %b, <8 x i32> poison, <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @extract_hi128_v8i32_2(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: extract_hi128_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 16 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i32> %b, <8 x i32> poison, <4 x i32> + ret <4 x i32> %1 +} + +define <4 x float> @extract_lo128_v8f32_1(<8 x float> %a) { +; CHECK-LABEL: extract_lo128_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> + ret <4 x float> %1 +} + +define <4 x float> @extract_hi128_v8f32_1(<8 x float> %a) { +; CHECK-LABEL: extract_hi128_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 16 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> + ret <4 x float> %1 +} + +define <4 x float> @extract_lo128_v8f32_2(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: extract_lo128_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> + ret <4 x float> %1 +} + +define <4 x float> @extract_hi128_v8f32_2(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: extract_hi128_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 16 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> + ret <4 x float> %1 +} + +define <2 x i64> @extract_lo128_v4i64_1(<4 x i64> %a) { +; CHECK-LABEL: extract_lo128_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i64> %a, <4 x i64> poison, <2 x i32> + ret <2 x i64> %1 +} + +define <2 x i64> @extract_hi128_v4i64_1(<4 x i64> %a) { +; CHECK-LABEL: extract_hi128_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 16 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i64> %a, <4 x i64> poison, <2 x i32> + ret <2 x i64> %1 +} + +define <2 x i64> @extract_lo128_v4i64_2(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: extract_lo128_v4i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i64> %b, <4 x i64> poison, <2 x i32> + ret <2 x i64> %1 +} + +define <2 x i64> @extract_hi128_v4i64_2(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: extract_hi128_v4i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 16 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i64> %b, <4 x i64> poison, <2 x i32> + ret <2 x i64> %1 +} + +define <2 x double> @extract_lo128_v4f64_a(<4 x double> %a) { +; CHECK-LABEL: extract_lo128_v4f64_a: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> + ret <2 x double> %1 +} + +define <2 x double> @extract_hi128_v4f64_1(<4 x double> %a) { +; CHECK-LABEL: extract_hi128_v4f64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 16 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> + ret <2 x double> %1 +} + +define <2 x double> @extract_lo128_v4f64_2(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: extract_lo128_v4f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> + ret <2 x double> %1 +} + +define <2 x double> @extract_hi128_v4f64_2(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: extract_hi128_v4f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 16 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> + ret <2 x double> %1 +} + +define <8 x i16> @extract_lo128_v16i16_1(<16 x i16> %a) { +; CHECK-LABEL: extract_lo128_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i16> %a, <16 x i16> poison, + <8 x i32> + ret <8 x i16> %1 +} + +define <8 x i16> @extract_hi128_v16i16_1(<16 x i16> %a) { +; CHECK-LABEL: extract_hi128_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 16 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i16> %a, <16 x i16> poison, + <8 x i32> + ret <8 x i16> %1 +} + +define <8 x i16> @extract_lo128_v16i16_2(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: extract_lo128_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i16> %b, <16 x i16> poison, + <8 x i32> + ret <8 x i16> %1 +} + +define <8 x i16> @extract_hi128_v16i16_2(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: extract_hi128_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 16 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i16> %b, <16 x i16> poison, + <8 x i32> + ret <8 x i16> %1 +} + +define <16 x i8> @extract_lo128_v32i8_1(<32 x i8> %a) { +; CHECK-LABEL: extract_lo128_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <32 x i8> %a, <32 x i8> poison, + <16 x i32> + ret <16 x i8> %1 +} + +define <16 x i8> @extract_hi128_v32i8_1(<32 x i8> %a) { +; CHECK-LABEL: extract_hi128_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 16 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <32 x i8> %a, <32 x i8> poison, + <16 x i32> + ret <16 x i8> %1 +} + +define <16 x i8> @extract_lo128_v32i8_2(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: extract_lo128_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <32 x i8> %b, <32 x i8> poison, + <16 x i32> + ret <16 x i8> %1 +} + +define <16 x i8> @extract_hi128_v32i8_2(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: extract_hi128_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 16 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <32 x i8> %b, <32 x i8> poison, + <16 x i32> + ret <16 x i8> %1 +}