Skip to content

Commit ce63cd3

Browse files
committed
[DAG] Fold freeze(concat_vectors(x,y,...)) -> concat_vectors(freeze(x),freeze(y),...)
Another of the cleanups necessary for D136529
1 parent cbd7aaa commit ce63cd3

File tree

4 files changed

+87
-96
lines changed

4 files changed

+87
-96
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14711,7 +14711,8 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1471114711
return SDValue();
1471214712

1471314713
bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
14714-
N0.getOpcode() == ISD::BUILD_PAIR;
14714+
N0.getOpcode() == ISD::BUILD_PAIR ||
14715+
N0.getOpcode() == ISD::CONCAT_VECTORS;
1471514716

1471614717
SmallSetVector<SDValue, 8> MaybePoisonOperands;
1471714718
for (SDValue Op : N0->ops()) {

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4758,6 +4758,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
47584758
case ISD::AssertSext:
47594759
case ISD::AssertZext:
47604760
case ISD::FREEZE:
4761+
case ISD::CONCAT_VECTORS:
47614762
case ISD::INSERT_SUBVECTOR:
47624763
case ISD::AND:
47634764
case ISD::OR:

llvm/test/CodeGen/X86/avx512-broadcast-arith.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,14 @@ define <64 x i8> @add_v64i8_broadcasts(<64 x i8> %a0, i64 %a1, i8 %a2) {
3030
; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
3131
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
3232
; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0
33-
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
34-
; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm3
33+
; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm3
34+
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
3535
; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0
36-
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
37-
; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm0
36+
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm4
3837
; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0
3938
; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm1
4039
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
41-
; AVX512F-NEXT: vpternlogq $226, %zmm3, %zmm2, %zmm0
40+
; AVX512F-NEXT: vpternlogq $226, %zmm4, %zmm2, %zmm0
4241
; AVX512F-NEXT: retq
4342
;
4443
; AVX512BW-LABEL: add_v64i8_broadcasts:

llvm/test/CodeGen/X86/midpoint-int-vec-512.ll

Lines changed: 80 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -283,17 +283,16 @@ define <32 x i16> @vec512_i16_signed_reg_reg(<32 x i16> %a1, <32 x i16> %a2) nou
283283
; AVX512F-NEXT: vpsubw %ymm5, %ymm1, %ymm1
284284
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
285285
; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2
286+
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5
287+
; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
288+
; AVX512F-NEXT: vpsubw %ymm2, %ymm6, %ymm2
289+
; AVX512F-NEXT: vpsubw %ymm1, %ymm6, %ymm1
286290
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
291+
; AVX512F-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm1
287292
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
288-
; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5
289-
; AVX512F-NEXT: vpsubw %ymm2, %ymm5, %ymm2
290-
; AVX512F-NEXT: vpsubw %ymm1, %ymm5, %ymm5
291-
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm5, %zmm2
292-
; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm4, %zmm2
293-
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm1
294-
; AVX512F-NEXT: vpaddw %ymm3, %ymm1, %ymm1
295-
; AVX512F-NEXT: vpaddw %ymm0, %ymm2, %ymm0
296-
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
293+
; AVX512F-NEXT: vpaddw %ymm3, %ymm2, %ymm2
294+
; AVX512F-NEXT: vpaddw %ymm0, %ymm1, %ymm0
295+
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
297296
; AVX512F-NEXT: retq
298297
;
299298
; AVX512VL-FALLBACK-LABEL: vec512_i16_signed_reg_reg:
@@ -311,17 +310,16 @@ define <32 x i16> @vec512_i16_signed_reg_reg(<32 x i16> %a1, <32 x i16> %a2) nou
311310
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm5, %ymm1, %ymm1
312311
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
313312
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
313+
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5
314+
; AVX512VL-FALLBACK-NEXT: vpxor %xmm6, %xmm6, %xmm6
315+
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm6, %ymm2
316+
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm6, %ymm1
314317
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
318+
; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm1
315319
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
316-
; AVX512VL-FALLBACK-NEXT: vpxor %xmm5, %xmm5, %xmm5
317-
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm5, %ymm2
318-
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm5, %ymm5
319-
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm5, %zmm2
320-
; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm1, %zmm4, %zmm2
321-
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm2, %ymm1
322-
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm1, %ymm1
323-
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
324-
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
320+
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm2, %ymm2
321+
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
322+
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
325323
; AVX512VL-FALLBACK-NEXT: retq
326324
;
327325
; AVX512BW-LABEL: vec512_i16_signed_reg_reg:
@@ -362,17 +360,16 @@ define <32 x i16> @vec512_i16_unsigned_reg_reg(<32 x i16> %a1, <32 x i16> %a2) n
362360
; AVX512F-NEXT: vpsubw %ymm6, %ymm1, %ymm1
363361
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
364362
; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2
363+
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm4
364+
; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
365+
; AVX512F-NEXT: vpsubw %ymm2, %ymm6, %ymm2
366+
; AVX512F-NEXT: vpsubw %ymm1, %ymm6, %ymm1
365367
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
368+
; AVX512F-NEXT: vpternlogq $184, %zmm4, %zmm5, %zmm1
366369
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
367-
; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4
368-
; AVX512F-NEXT: vpsubw %ymm2, %ymm4, %ymm2
369-
; AVX512F-NEXT: vpsubw %ymm1, %ymm4, %ymm4
370-
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm4, %zmm2
371-
; AVX512F-NEXT: vpternlogq $216, %zmm5, %zmm1, %zmm2
372-
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm1
373-
; AVX512F-NEXT: vpaddw %ymm3, %ymm1, %ymm1
374-
; AVX512F-NEXT: vpaddw %ymm0, %ymm2, %ymm0
375-
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
370+
; AVX512F-NEXT: vpaddw %ymm3, %ymm2, %ymm2
371+
; AVX512F-NEXT: vpaddw %ymm0, %ymm1, %ymm0
372+
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
376373
; AVX512F-NEXT: retq
377374
;
378375
; AVX512VL-FALLBACK-LABEL: vec512_i16_unsigned_reg_reg:
@@ -390,17 +387,16 @@ define <32 x i16> @vec512_i16_unsigned_reg_reg(<32 x i16> %a1, <32 x i16> %a2) n
390387
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm6, %ymm1, %ymm1
391388
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
392389
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
390+
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm4
391+
; AVX512VL-FALLBACK-NEXT: vpxor %xmm6, %xmm6, %xmm6
392+
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm6, %ymm2
393+
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm6, %ymm1
393394
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
395+
; AVX512VL-FALLBACK-NEXT: vpternlogq $184, %zmm4, %zmm5, %zmm1
394396
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
395-
; AVX512VL-FALLBACK-NEXT: vpxor %xmm4, %xmm4, %xmm4
396-
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm4, %ymm2
397-
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm4, %ymm4
398-
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm4, %zmm2
399-
; AVX512VL-FALLBACK-NEXT: vpternlogq $216, %zmm5, %zmm1, %zmm2
400-
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm2, %ymm1
401-
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm1, %ymm1
402-
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
403-
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
397+
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm2, %ymm2
398+
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
399+
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
404400
; AVX512VL-FALLBACK-NEXT: retq
405401
;
406402
; AVX512BW-LABEL: vec512_i16_unsigned_reg_reg:
@@ -444,17 +440,16 @@ define <32 x i16> @vec512_i16_signed_mem_reg(ptr %a1_addr, <32 x i16> %a2) nounw
444440
; AVX512F-NEXT: vpsubw %ymm5, %ymm0, %ymm0
445441
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0
446442
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
443+
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm5
444+
; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
445+
; AVX512F-NEXT: vpsubw %ymm1, %ymm6, %ymm1
446+
; AVX512F-NEXT: vpsubw %ymm0, %ymm6, %ymm0
447447
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
448+
; AVX512F-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm0
448449
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
449-
; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5
450-
; AVX512F-NEXT: vpsubw %ymm1, %ymm5, %ymm1
451-
; AVX512F-NEXT: vpsubw %ymm0, %ymm5, %ymm5
452-
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm1
453-
; AVX512F-NEXT: vpternlogq $226, %zmm0, %zmm4, %zmm1
454-
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
455-
; AVX512F-NEXT: vpaddw %ymm3, %ymm0, %ymm0
456-
; AVX512F-NEXT: vpaddw %ymm2, %ymm1, %ymm1
457-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
450+
; AVX512F-NEXT: vpaddw %ymm3, %ymm1, %ymm1
451+
; AVX512F-NEXT: vpaddw %ymm2, %ymm0, %ymm0
452+
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
458453
; AVX512F-NEXT: retq
459454
;
460455
; AVX512VL-FALLBACK-LABEL: vec512_i16_signed_mem_reg:
@@ -473,17 +468,16 @@ define <32 x i16> @vec512_i16_signed_mem_reg(ptr %a1_addr, <32 x i16> %a2) nounw
473468
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm5, %ymm0, %ymm0
474469
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm0, %ymm0
475470
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
471+
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm5
472+
; AVX512VL-FALLBACK-NEXT: vpxor %xmm6, %xmm6, %xmm6
473+
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm6, %ymm1
474+
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm0, %ymm6, %ymm0
476475
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
476+
; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm0
477477
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm0, %ymm1
478-
; AVX512VL-FALLBACK-NEXT: vpxor %xmm5, %xmm5, %xmm5
479-
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm5, %ymm1
480-
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm0, %ymm5, %ymm5
481-
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm1
482-
; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm0, %zmm4, %zmm1
483-
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm0
484-
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm0, %ymm0
485-
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm2, %ymm1, %ymm1
486-
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
478+
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm1, %ymm1
479+
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
480+
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
487481
; AVX512VL-FALLBACK-NEXT: retq
488482
;
489483
; AVX512BW-LABEL: vec512_i16_signed_mem_reg:
@@ -527,17 +521,16 @@ define <32 x i16> @vec512_i16_signed_reg_mem(<32 x i16> %a1, ptr %a2_addr) nounw
527521
; AVX512F-NEXT: vpsubw %ymm5, %ymm1, %ymm1
528522
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
529523
; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2
524+
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5
525+
; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
526+
; AVX512F-NEXT: vpsubw %ymm2, %ymm6, %ymm2
527+
; AVX512F-NEXT: vpsubw %ymm1, %ymm6, %ymm1
530528
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
529+
; AVX512F-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm1
531530
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
532-
; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5
533-
; AVX512F-NEXT: vpsubw %ymm2, %ymm5, %ymm2
534-
; AVX512F-NEXT: vpsubw %ymm1, %ymm5, %ymm5
535-
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm5, %zmm2
536-
; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm4, %zmm2
537-
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm1
538-
; AVX512F-NEXT: vpaddw %ymm3, %ymm1, %ymm1
539-
; AVX512F-NEXT: vpaddw %ymm0, %ymm2, %ymm0
540-
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
531+
; AVX512F-NEXT: vpaddw %ymm3, %ymm2, %ymm2
532+
; AVX512F-NEXT: vpaddw %ymm0, %ymm1, %ymm0
533+
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
541534
; AVX512F-NEXT: retq
542535
;
543536
; AVX512VL-FALLBACK-LABEL: vec512_i16_signed_reg_mem:
@@ -556,17 +549,16 @@ define <32 x i16> @vec512_i16_signed_reg_mem(<32 x i16> %a1, ptr %a2_addr) nounw
556549
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm5, %ymm1, %ymm1
557550
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
558551
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
552+
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5
553+
; AVX512VL-FALLBACK-NEXT: vpxor %xmm6, %xmm6, %xmm6
554+
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm6, %ymm2
555+
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm6, %ymm1
559556
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
557+
; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm1
560558
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
561-
; AVX512VL-FALLBACK-NEXT: vpxor %xmm5, %xmm5, %xmm5
562-
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm5, %ymm2
563-
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm5, %ymm5
564-
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm5, %zmm2
565-
; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm1, %zmm4, %zmm2
566-
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm2, %ymm1
567-
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm1, %ymm1
568-
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
569-
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
559+
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm2, %ymm2
560+
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
561+
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
570562
; AVX512VL-FALLBACK-NEXT: retq
571563
;
572564
; AVX512BW-LABEL: vec512_i16_signed_reg_mem:
@@ -611,17 +603,16 @@ define <32 x i16> @vec512_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwin
611603
; AVX512F-NEXT: vpsubw %ymm5, %ymm0, %ymm0
612604
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0
613605
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
606+
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm5
607+
; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
608+
; AVX512F-NEXT: vpsubw %ymm1, %ymm6, %ymm1
609+
; AVX512F-NEXT: vpsubw %ymm0, %ymm6, %ymm0
614610
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
611+
; AVX512F-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm0
615612
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
616-
; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5
617-
; AVX512F-NEXT: vpsubw %ymm1, %ymm5, %ymm1
618-
; AVX512F-NEXT: vpsubw %ymm0, %ymm5, %ymm5
619-
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm1
620-
; AVX512F-NEXT: vpternlogq $226, %zmm0, %zmm4, %zmm1
621-
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
622-
; AVX512F-NEXT: vpaddw %ymm3, %ymm0, %ymm0
623-
; AVX512F-NEXT: vpaddw %ymm2, %ymm1, %ymm1
624-
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
613+
; AVX512F-NEXT: vpaddw %ymm3, %ymm1, %ymm1
614+
; AVX512F-NEXT: vpaddw %ymm2, %ymm0, %ymm0
615+
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
625616
; AVX512F-NEXT: retq
626617
;
627618
; AVX512VL-FALLBACK-LABEL: vec512_i16_signed_mem_mem:
@@ -641,17 +632,16 @@ define <32 x i16> @vec512_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwin
641632
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm5, %ymm0, %ymm0
642633
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm0, %ymm0
643634
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
635+
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm5
636+
; AVX512VL-FALLBACK-NEXT: vpxor %xmm6, %xmm6, %xmm6
637+
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm6, %ymm1
638+
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm0, %ymm6, %ymm0
644639
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
640+
; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm5, %zmm4, %zmm0
645641
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm0, %ymm1
646-
; AVX512VL-FALLBACK-NEXT: vpxor %xmm5, %xmm5, %xmm5
647-
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm5, %ymm1
648-
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm0, %ymm5, %ymm5
649-
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm1
650-
; AVX512VL-FALLBACK-NEXT: vpternlogq $226, %zmm0, %zmm4, %zmm1
651-
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm0
652-
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm0, %ymm0
653-
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm2, %ymm1, %ymm1
654-
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
642+
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm1, %ymm1
643+
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
644+
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
655645
; AVX512VL-FALLBACK-NEXT: retq
656646
;
657647
; AVX512BW-LABEL: vec512_i16_signed_mem_mem:

0 commit comments

Comments
 (0)