@@ -567,22 +567,19 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D,
567
567
;
568
568
; SSE4-LABEL: @buildvector_mul_subadd_ps256(
569
569
; SSE4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]]
570
- ; SSE4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]]
571
- ; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
572
- ; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
570
+ ; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]]
573
571
; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
574
- ; SSE4-NEXT: [[TMP4 :%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
575
- ; SSE4-NEXT: [[TMP5 :%.*]] = shufflevector <8 x float> [[TMP3 ]], <8 x float> [[TMP4]] , <8 x i32> <i32 0 , i32 1 , i32 2 , i32 3 , i32 8 , i32 9 , i32 10 , i32 11 >
576
- ; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5 ]], <8 x float> poison , <8 x i32> <i32 0, i32 4 , i32 1, i32 5 , i32 2, i32 6 , i32 3, i32 7 >
572
+ ; SSE4-NEXT: [[TMP5 :%.*]] = fsub <8 x float> [[A ]], [[B]]
573
+ ; SSE4-NEXT: [[TMP4 :%.*]] = shufflevector <8 x float> [[TMP5 ]], <8 x float> poison , <8 x i32> <i32 1 , i32 3 , i32 5 , i32 7 , i32 poison , i32 poison , i32 poison , i32 poison >
574
+ ; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP3 ]], <8 x float> [[TMP4]] , <8 x i32> <i32 0, i32 8 , i32 1, i32 9 , i32 2, i32 10 , i32 3, i32 11 >
577
575
; SSE4-NEXT: ret <8 x float> [[TMP6]]
578
576
;
579
577
; AVX_FMA4-LABEL: @buildvector_mul_subadd_ps256(
580
578
; AVX_FMA4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]]
581
- ; AVX_FMA4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]]
582
- ; AVX_FMA4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
583
- ; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
579
+ ; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]]
584
580
; AVX_FMA4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
585
- ; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
581
+ ; AVX_FMA4-NEXT: [[TMP7:%.*]] = fsub <8 x float> [[A]], [[B]]
582
+ ; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
586
583
; AVX_FMA4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
587
584
; AVX_FMA4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
588
585
; AVX_FMA4-NEXT: ret <8 x float> [[TMP6]]
@@ -677,13 +674,11 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float>
677
674
;
678
675
; AVX_FMA-LABEL: @buildvector_mul_subadd_ps512(
679
676
; AVX_FMA-NEXT: [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]]
680
- ; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A]], [[B:%.*]]
681
- ; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
682
- ; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B]]
677
+ ; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B:%.*]]
683
678
; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
684
- ; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP2 ]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
685
- ; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP4 ]], <16 x float> [[TMP5]] , <16 x i32> <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 >
686
- ; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP6 ]], <16 x float> poison , <16 x i32> <i32 0, i32 8 , i32 1, i32 9 , i32 2, i32 10 , i32 3, i32 11 , i32 4, i32 12 , i32 5, i32 13 , i32 6, i32 14 , i32 7, i32 15 >
679
+ ; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <16 x float> [[A ]], [[B]]
680
+ ; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP5 ]], <16 x float> poison , <16 x i32> <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 , i32 poison , i32 poison , i32 poison , i32 poison , i32 poison , i32 poison , i32 poison , i32 poison >
681
+ ; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP4 ]], <16 x float> [[TMP6]] , <16 x i32> <i32 0, i32 16 , i32 1, i32 17 , i32 2, i32 18 , i32 3, i32 19 , i32 4, i32 20 , i32 5, i32 21 , i32 6, i32 22 , i32 7, i32 23 >
687
682
; AVX_FMA-NEXT: ret <16 x float> [[TMP7]]
688
683
;
689
684
; AVX512-LABEL: @buildvector_mul_subadd_ps512(
@@ -880,13 +875,11 @@ define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double>
880
875
;
881
876
; AVX_FMA-LABEL: @buildvector_mul_subadd_pd512(
882
877
; AVX_FMA-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]]
883
- ; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A]], [[B:%.*]]
884
- ; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
885
- ; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B]]
878
+ ; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B:%.*]]
886
879
; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[TMP3]], <8 x double> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
887
- ; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2 ]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
888
- ; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP4 ]], <8 x double> [[TMP5]] , <8 x i32> <i32 0 , i32 1 , i32 2 , i32 3 , i32 8 , i32 9 , i32 10 , i32 11 >
889
- ; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6 ]], <8 x double> poison , <8 x i32> <i32 0, i32 4 , i32 1, i32 5 , i32 2, i32 6 , i32 3, i32 7 >
880
+ ; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <8 x double> [[A ]], [[B]]
881
+ ; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP5 ]], <8 x double> poison , <8 x i32> <i32 1 , i32 3 , i32 5 , i32 7 , i32 poison , i32 poison , i32 poison , i32 poison >
882
+ ; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP4 ]], <8 x double> [[TMP6]] , <8 x i32> <i32 0, i32 8 , i32 1, i32 9 , i32 2, i32 10 , i32 3, i32 11 >
890
883
; AVX_FMA-NEXT: ret <8 x double> [[TMP7]]
891
884
;
892
885
; AVX512-LABEL: @buildvector_mul_subadd_pd512(
0 commit comments