|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK |
3 |
| -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK |
| 2 | +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 |
| 3 | +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 |
4 | 4 |
|
5 | 5 | define <2 x i16> @vwmulsu_v2i16(<2 x i8>* %x, <2 x i8>* %y) {
|
6 | 6 | ; CHECK-LABEL: vwmulsu_v2i16:
|
@@ -681,3 +681,247 @@ define <16 x i64> @vwmulsu_vx_v16i64(<16 x i32>* %x, i32 %y) {
|
681 | 681 | %f = mul <16 x i64> %d, %e
|
682 | 682 | ret <16 x i64> %f
|
683 | 683 | }
|
| 684 | + |
| 685 | +define <8 x i16> @vwmulsu_vx_v8i16_i8(<8 x i8>* %x, i8* %y) { |
| 686 | +; CHECK-LABEL: vwmulsu_vx_v8i16_i8: |
| 687 | +; CHECK: # %bb.0: |
| 688 | +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| 689 | +; CHECK-NEXT: vle8.v v9, (a0) |
| 690 | +; CHECK-NEXT: lbu a0, 0(a1) |
| 691 | +; CHECK-NEXT: vwmulsu.vx v8, v9, a0 |
| 692 | +; CHECK-NEXT: ret |
| 693 | + %a = load <8 x i8>, <8 x i8>* %x |
| 694 | + %b = load i8, i8* %y |
| 695 | + %c = zext i8 %b to i16 |
| 696 | + %d = insertelement <8 x i16> poison, i16 %c, i32 0 |
| 697 | + %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer |
| 698 | + %f = sext <8 x i8> %a to <8 x i16> |
| 699 | + %g = mul <8 x i16> %e, %f |
| 700 | + ret <8 x i16> %g |
| 701 | +} |
| 702 | + |
| 703 | +define <8 x i16> @vwmulsu_vx_v8i16_i8_swap(<8 x i8>* %x, i8* %y) { |
| 704 | +; CHECK-LABEL: vwmulsu_vx_v8i16_i8_swap: |
| 705 | +; CHECK: # %bb.0: |
| 706 | +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| 707 | +; CHECK-NEXT: vle8.v v8, (a0) |
| 708 | +; CHECK-NEXT: lb a0, 0(a1) |
| 709 | +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| 710 | +; CHECK-NEXT: vzext.vf2 v9, v8 |
| 711 | +; CHECK-NEXT: vmul.vx v8, v9, a0 |
| 712 | +; CHECK-NEXT: ret |
| 713 | + %a = load <8 x i8>, <8 x i8>* %x |
| 714 | + %b = load i8, i8* %y |
| 715 | + %c = sext i8 %b to i16 |
| 716 | + %d = insertelement <8 x i16> poison, i16 %c, i32 0 |
| 717 | + %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer |
| 718 | + %f = zext <8 x i8> %a to <8 x i16> |
| 719 | + %g = mul <8 x i16> %e, %f |
| 720 | + ret <8 x i16> %g |
| 721 | +} |
| 722 | + |
| 723 | +define <4 x i32> @vwmulsu_vx_v4i32_i8(<4 x i16>* %x, i8* %y) { |
| 724 | +; CHECK-LABEL: vwmulsu_vx_v4i32_i8: |
| 725 | +; CHECK: # %bb.0: |
| 726 | +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| 727 | +; CHECK-NEXT: vle16.v v9, (a0) |
| 728 | +; CHECK-NEXT: lbu a0, 0(a1) |
| 729 | +; CHECK-NEXT: vwmul.vx v8, v9, a0 |
| 730 | +; CHECK-NEXT: ret |
| 731 | + %a = load <4 x i16>, <4 x i16>* %x |
| 732 | + %b = load i8, i8* %y |
| 733 | + %c = zext i8 %b to i32 |
| 734 | + %d = insertelement <4 x i32> poison, i32 %c, i32 0 |
| 735 | + %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer |
| 736 | + %f = sext <4 x i16> %a to <4 x i32> |
| 737 | + %g = mul <4 x i32> %e, %f |
| 738 | + ret <4 x i32> %g |
| 739 | +} |
| 740 | + |
| 741 | +define <4 x i32> @vwmulsu_vx_v4i32_i16(<4 x i16>* %x, i16* %y) { |
| 742 | +; CHECK-LABEL: vwmulsu_vx_v4i32_i16: |
| 743 | +; CHECK: # %bb.0: |
| 744 | +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| 745 | +; CHECK-NEXT: vle16.v v9, (a0) |
| 746 | +; CHECK-NEXT: lhu a0, 0(a1) |
| 747 | +; CHECK-NEXT: vwmulsu.vx v8, v9, a0 |
| 748 | +; CHECK-NEXT: ret |
| 749 | + %a = load <4 x i16>, <4 x i16>* %x |
| 750 | + %b = load i16, i16* %y |
| 751 | + %c = zext i16 %b to i32 |
| 752 | + %d = insertelement <4 x i32> poison, i32 %c, i32 0 |
| 753 | + %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer |
| 754 | + %f = sext <4 x i16> %a to <4 x i32> |
| 755 | + %g = mul <4 x i32> %e, %f |
| 756 | + ret <4 x i32> %g |
| 757 | +} |
| 758 | + |
| 759 | +define <2 x i64> @vwmulsu_vx_v2i64_i8(<2 x i32>* %x, i8* %y) { |
| 760 | +; RV32-LABEL: vwmulsu_vx_v2i64_i8: |
| 761 | +; RV32: # %bb.0: |
| 762 | +; RV32-NEXT: addi sp, sp, -16 |
| 763 | +; RV32-NEXT: .cfi_def_cfa_offset 16 |
| 764 | +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| 765 | +; RV32-NEXT: lbu a1, 0(a1) |
| 766 | +; RV32-NEXT: vle32.v v8, (a0) |
| 767 | +; RV32-NEXT: sw zero, 12(sp) |
| 768 | +; RV32-NEXT: sw a1, 8(sp) |
| 769 | +; RV32-NEXT: addi a0, sp, 8 |
| 770 | +; RV32-NEXT: vlse64.v v9, (a0), zero |
| 771 | +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu |
| 772 | +; RV32-NEXT: vsext.vf2 v10, v8 |
| 773 | +; RV32-NEXT: vmul.vv v8, v9, v10 |
| 774 | +; RV32-NEXT: addi sp, sp, 16 |
| 775 | +; RV32-NEXT: ret |
| 776 | +; |
| 777 | +; RV64-LABEL: vwmulsu_vx_v2i64_i8: |
| 778 | +; RV64: # %bb.0: |
| 779 | +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| 780 | +; RV64-NEXT: vle32.v v9, (a0) |
| 781 | +; RV64-NEXT: lbu a0, 0(a1) |
| 782 | +; RV64-NEXT: vwmul.vx v8, v9, a0 |
| 783 | +; RV64-NEXT: ret |
| 784 | + %a = load <2 x i32>, <2 x i32>* %x |
| 785 | + %b = load i8, i8* %y |
| 786 | + %c = zext i8 %b to i64 |
| 787 | + %d = insertelement <2 x i64> poison, i64 %c, i64 0 |
| 788 | + %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer |
| 789 | + %f = sext <2 x i32> %a to <2 x i64> |
| 790 | + %g = mul <2 x i64> %e, %f |
| 791 | + ret <2 x i64> %g |
| 792 | +} |
| 793 | + |
| 794 | +define <2 x i64> @vwmulsu_vx_v2i64_i16(<2 x i32>* %x, i16* %y) { |
| 795 | +; RV32-LABEL: vwmulsu_vx_v2i64_i16: |
| 796 | +; RV32: # %bb.0: |
| 797 | +; RV32-NEXT: addi sp, sp, -16 |
| 798 | +; RV32-NEXT: .cfi_def_cfa_offset 16 |
| 799 | +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| 800 | +; RV32-NEXT: lhu a1, 0(a1) |
| 801 | +; RV32-NEXT: vle32.v v8, (a0) |
| 802 | +; RV32-NEXT: sw zero, 12(sp) |
| 803 | +; RV32-NEXT: sw a1, 8(sp) |
| 804 | +; RV32-NEXT: addi a0, sp, 8 |
| 805 | +; RV32-NEXT: vlse64.v v9, (a0), zero |
| 806 | +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu |
| 807 | +; RV32-NEXT: vsext.vf2 v10, v8 |
| 808 | +; RV32-NEXT: vmul.vv v8, v9, v10 |
| 809 | +; RV32-NEXT: addi sp, sp, 16 |
| 810 | +; RV32-NEXT: ret |
| 811 | +; |
| 812 | +; RV64-LABEL: vwmulsu_vx_v2i64_i16: |
| 813 | +; RV64: # %bb.0: |
| 814 | +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| 815 | +; RV64-NEXT: vle32.v v9, (a0) |
| 816 | +; RV64-NEXT: lhu a0, 0(a1) |
| 817 | +; RV64-NEXT: vwmul.vx v8, v9, a0 |
| 818 | +; RV64-NEXT: ret |
| 819 | + %a = load <2 x i32>, <2 x i32>* %x |
| 820 | + %b = load i16, i16* %y |
| 821 | + %c = zext i16 %b to i64 |
| 822 | + %d = insertelement <2 x i64> poison, i64 %c, i64 0 |
| 823 | + %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer |
| 824 | + %f = sext <2 x i32> %a to <2 x i64> |
| 825 | + %g = mul <2 x i64> %e, %f |
| 826 | + ret <2 x i64> %g |
| 827 | +} |
| 828 | + |
| 829 | +define <2 x i64> @vwmulsu_vx_v2i64_i32(<2 x i32>* %x, i32* %y) { |
| 830 | +; RV32-LABEL: vwmulsu_vx_v2i64_i32: |
| 831 | +; RV32: # %bb.0: |
| 832 | +; RV32-NEXT: addi sp, sp, -16 |
| 833 | +; RV32-NEXT: .cfi_def_cfa_offset 16 |
| 834 | +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| 835 | +; RV32-NEXT: lw a1, 0(a1) |
| 836 | +; RV32-NEXT: vle32.v v8, (a0) |
| 837 | +; RV32-NEXT: sw zero, 12(sp) |
| 838 | +; RV32-NEXT: sw a1, 8(sp) |
| 839 | +; RV32-NEXT: addi a0, sp, 8 |
| 840 | +; RV32-NEXT: vlse64.v v9, (a0), zero |
| 841 | +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu |
| 842 | +; RV32-NEXT: vsext.vf2 v10, v8 |
| 843 | +; RV32-NEXT: vmul.vv v8, v9, v10 |
| 844 | +; RV32-NEXT: addi sp, sp, 16 |
| 845 | +; RV32-NEXT: ret |
| 846 | +; |
| 847 | +; RV64-LABEL: vwmulsu_vx_v2i64_i32: |
| 848 | +; RV64: # %bb.0: |
| 849 | +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| 850 | +; RV64-NEXT: vle32.v v9, (a0) |
| 851 | +; RV64-NEXT: lwu a0, 0(a1) |
| 852 | +; RV64-NEXT: vwmulsu.vx v8, v9, a0 |
| 853 | +; RV64-NEXT: ret |
| 854 | + %a = load <2 x i32>, <2 x i32>* %x |
| 855 | + %b = load i32, i32* %y |
| 856 | + %c = zext i32 %b to i64 |
| 857 | + %d = insertelement <2 x i64> poison, i64 %c, i64 0 |
| 858 | + %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer |
| 859 | + %f = sext <2 x i32> %a to <2 x i64> |
| 860 | + %g = mul <2 x i64> %e, %f |
| 861 | + ret <2 x i64> %g |
| 862 | +} |
| 863 | + |
| 864 | +define <8 x i16> @vwmulsu_vx_v8i16_i8_and(<8 x i8>* %x, i16 %y) { |
| 865 | +; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and: |
| 866 | +; CHECK: # %bb.0: |
| 867 | +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| 868 | +; CHECK-NEXT: vle8.v v9, (a0) |
| 869 | +; CHECK-NEXT: vwmulsu.vx v8, v9, a1 |
| 870 | +; CHECK-NEXT: ret |
| 871 | + %a = load <8 x i8>, <8 x i8>* %x |
| 872 | + %b = and i16 %y, 255 |
| 873 | + %c = insertelement <8 x i16> poison, i16 %b, i32 0 |
| 874 | + %d = shufflevector <8 x i16> %c, <8 x i16> poison, <8 x i32> zeroinitializer |
| 875 | + %e = sext <8 x i8> %a to <8 x i16> |
| 876 | + %f = mul <8 x i16> %d, %e |
| 877 | + ret <8 x i16> %f |
| 878 | +} |
| 879 | + |
| 880 | +define <8 x i16> @vwmulsu_vx_v8i16_i8_and1(<8 x i8>* %x, i16 %y) { |
| 881 | +; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and1: |
| 882 | +; CHECK: # %bb.0: |
| 883 | +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| 884 | +; CHECK-NEXT: vle8.v v9, (a0) |
| 885 | +; CHECK-NEXT: andi a0, a1, 254 |
| 886 | +; CHECK-NEXT: vwmulsu.vx v8, v9, a0 |
| 887 | +; CHECK-NEXT: ret |
| 888 | + %a = load <8 x i8>, <8 x i8>* %x |
| 889 | + %b = and i16 %y, 254 |
| 890 | + %c = insertelement <8 x i16> poison, i16 %b, i32 0 |
| 891 | + %d = shufflevector <8 x i16> %c, <8 x i16> poison, <8 x i32> zeroinitializer |
| 892 | + %e = sext <8 x i8> %a to <8 x i16> |
| 893 | + %f = mul <8 x i16> %d, %e |
| 894 | + ret <8 x i16> %f |
| 895 | +} |
| 896 | + |
| 897 | +define <4 x i32> @vwmulsu_vx_v4i32_i16_and(<4 x i16>* %x, i32 %y) { |
| 898 | +; CHECK-LABEL: vwmulsu_vx_v4i32_i16_and: |
| 899 | +; CHECK: # %bb.0: |
| 900 | +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| 901 | +; CHECK-NEXT: vle16.v v9, (a0) |
| 902 | +; CHECK-NEXT: vwmulsu.vx v8, v9, a1 |
| 903 | +; CHECK-NEXT: ret |
| 904 | + %a = load <4 x i16>, <4 x i16>* %x |
| 905 | + %b = and i32 %y, 65535 |
| 906 | + %c = insertelement <4 x i32> poison, i32 %b, i32 0 |
| 907 | + %d = shufflevector <4 x i32> %c, <4 x i32> poison, <4 x i32> zeroinitializer |
| 908 | + %e = sext <4 x i16> %a to <4 x i32> |
| 909 | + %f = mul <4 x i32> %d, %e |
| 910 | + ret <4 x i32> %f |
| 911 | +} |
| 912 | + |
| 913 | +define <4 x i32> @vwmulsu_vx_v4i32_i16_zext(<4 x i16>* %x, i16 %y) { |
| 914 | +; CHECK-LABEL: vwmulsu_vx_v4i32_i16_zext: |
| 915 | +; CHECK: # %bb.0: |
| 916 | +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| 917 | +; CHECK-NEXT: vle16.v v9, (a0) |
| 918 | +; CHECK-NEXT: vwmulsu.vx v8, v9, a1 |
| 919 | +; CHECK-NEXT: ret |
| 920 | + %a = load <4 x i16>, <4 x i16>* %x |
| 921 | + %b = zext i16 %y to i32 |
| 922 | + %c = insertelement <4 x i32> poison, i32 %b, i32 0 |
| 923 | + %d = shufflevector <4 x i32> %c, <4 x i32> poison, <4 x i32> zeroinitializer |
| 924 | + %e = sext <4 x i16> %a to <4 x i32> |
| 925 | + %f = mul <4 x i32> %d, %e |
| 926 | + ret <4 x i32> %f |
| 927 | +} |
0 commit comments