@@ -397,21 +397,20 @@ define double @fneg_xor_select_f64(i1 %cond, double %arg0, double %arg1) {
397
397
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
398
398
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
399
399
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
400
- ; GCN-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
401
400
; GCN-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
402
- ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
401
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
402
+ ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
403
403
; GCN-NEXT: s_setpc_b64 s[30:31]
404
404
;
405
405
; GFX11-LABEL: fneg_xor_select_f64:
406
406
; GFX11: ; %bb.0:
407
407
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408
408
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
409
409
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
410
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2 ) | instid1(VALU_DEP_2 )
410
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1 ) | instid1(VALU_DEP_1 )
411
411
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
412
- ; GFX11-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc_lo
413
- ; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc_lo
414
- ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v2
412
+ ; GFX11-NEXT: v_dual_cndmask_b32 v0, v3, v1 :: v_dual_cndmask_b32 v1, v4, v2
413
+ ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
415
414
; GFX11-NEXT: s_setpc_b64 s[30:31]
416
415
%select = select i1 %cond , double %arg0 , double %arg1
417
416
%fneg = fneg double %select
@@ -501,28 +500,29 @@ define double @select_fneg_select_fneg_f64(i1 %cond0, i1 %cond1, double %arg0, d
501
500
; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
502
501
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
503
502
; GCN-NEXT: v_and_b32_e32 v1, 1, v1
504
- ; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
505
503
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
506
- ; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v3
504
+ ; GCN-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc
505
+ ; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
507
506
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
508
- ; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v2 , vcc
507
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3 , vcc
509
508
; GCN-NEXT: s_setpc_b64 s[30:31]
510
509
;
511
510
; GFX11-LABEL: select_fneg_select_fneg_f64:
512
511
; GFX11: ; %bb.0:
513
512
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
514
513
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
515
- ; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
516
514
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
517
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
515
+ ; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
516
+ ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
517
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
518
518
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
519
- ; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v4 :: v_dual_and_b32 v1, 1, v1
520
- ; GFX11-NEXT: v_cndmask_b32_e32 v3 , v3, v5, vcc_lo
521
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(NEXT) | instid1(VALU_DEP_2)
519
+ ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc_lo
520
+ ; GFX11-NEXT: v_cndmask_b32_e32 v2 , v3, v5, vcc_lo
521
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4 ) | instskip(NEXT) | instid1(VALU_DEP_2)
522
522
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
523
- ; GFX11-NEXT: v_xor_b32_e32 v5 , 0x80000000, v3
523
+ ; GFX11-NEXT: v_xor_b32_e32 v3 , 0x80000000, v2
524
524
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
525
- ; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v5 , vcc_lo
525
+ ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v3 , vcc_lo
526
526
; GFX11-NEXT: s_setpc_b64 s[30:31]
527
527
%fneg0 = fneg double %arg0
528
528
%select0 = select i1 %cond0 , double %arg1 , double %fneg0
@@ -893,12 +893,12 @@ define double @cospiD_pattern1(i32 %arg, double %arg1, double %arg2) {
893
893
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
894
894
; GCN-NEXT: v_and_b32_e32 v5, 1, v0
895
895
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
896
- ; GCN-NEXT: v_cndmask_b32_e32 v4, v2, v4 , vcc
897
- ; GCN-NEXT: v_cndmask_b32_e32 v2, v1, v3 , vcc
898
- ; GCN-NEXT: v_xor_b32_e32 v1 , 0x80000000, v4
896
+ ; GCN-NEXT: v_cndmask_b32_e32 v3, v1, v3 , vcc
897
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v4 , vcc
898
+ ; GCN-NEXT: v_xor_b32_e32 v2 , 0x80000000, v1
899
899
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0
900
- ; GCN-NEXT: v_cndmask_b32_e32 v1, v4, v1 , vcc
901
- ; GCN-NEXT: v_mov_b32_e32 v0, v2
900
+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2 , vcc
901
+ ; GCN-NEXT: v_mov_b32_e32 v0, v3
902
902
; GCN-NEXT: s_setpc_b64 s[30:31]
903
903
;
904
904
; GFX11-LABEL: cospiD_pattern1:
@@ -908,12 +908,13 @@ define double @cospiD_pattern1(i32 %arg, double %arg1, double %arg2) {
908
908
; GFX11-NEXT: v_and_b32_e32 v5, 1, v0
909
909
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
910
910
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
911
- ; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v4 , vcc_lo
912
- ; GFX11-NEXT: v_cndmask_b32_e32 v2, v1, v3 , vcc_lo
911
+ ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v3 , vcc_lo
912
+ ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v4 , vcc_lo
913
913
; GFX11-NEXT: v_cmp_lt_i32_e32 vcc_lo, 1, v0
914
- ; GFX11-NEXT: v_xor_b32_e32 v5, 0x80000000, v4
915
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
916
- ; GFX11-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_cndmask_b32 v1, v4, v5
914
+ ; GFX11-NEXT: v_mov_b32_e32 v0, v3
915
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
916
+ ; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
917
+ ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
917
918
; GFX11-NEXT: s_setpc_b64 s[30:31]
918
919
%i = and i32 %arg , 1
919
920
%i3 = icmp eq i32 %i , 0
@@ -1390,17 +1391,14 @@ define double @fneg_f64_bitcast_build_vector_v2f32_foldable_sources_to_f64(float
1390
1391
; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_foldable_sources_to_f64:
1391
1392
; GCN: ; %bb.0:
1392
1393
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1393
- ; GCN-NEXT: v_add_f32_e32 v1, 2.0, v1
1394
- ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1394
+ ; GCN-NEXT: v_sub_f32_e32 v1, -2.0, v1
1395
1395
; GCN-NEXT: s_setpc_b64 s[30:31]
1396
1396
;
1397
1397
; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_foldable_sources_to_f64:
1398
1398
; GFX11: ; %bb.0:
1399
1399
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1400
1400
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1401
- ; GFX11-NEXT: v_add_f32_e32 v1, 2.0, v1
1402
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1403
- ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1401
+ ; GFX11-NEXT: v_sub_f32_e32 v1, -2.0, v1
1404
1402
; GFX11-NEXT: s_setpc_b64 s[30:31]
1405
1403
%fadd = fadd nsz nnan float %elt1 , 2 .0
1406
1404
%insert.0 = insertelement <2 x float > poison, float %elt0 , i32 0
0 commit comments