Skip to content

[AMDGPU] Add KnownBits simplification combines to RegBankCombiner #141591

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: users/pierre-vh/rbcomb-bfx
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPUCombine.td
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,10 @@ def AMDGPUPostLegalizerCombiner: GICombiner<

def AMDGPURegBankCombiner : GICombiner<
"AMDGPURegBankCombinerImpl",
[unmerge_merge, unmerge_cst, unmerge_undef,
zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
identity_combines, redundant_and, constant_fold_cast_op,
[unmerge_merge, unmerge_cst, unmerge_undef, int_minmax_to_med3,
ptr_add_immed_chain, fp_minmax_to_clamp, fp_minmax_to_med3,
fmed3_intrinsic_to_clamp, identity_combines, constant_fold_cast_op,
cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines,
lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract]> {
lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract,
known_bits_simplifications]> {
}
59 changes: 30 additions & 29 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1744,63 +1744,64 @@ define i65 @v_lshr_i65_33(i65 %value) {
; GFX6-LABEL: v_lshr_i65_33:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v3, v1
; GFX6-NEXT: v_mov_b32_e32 v0, 1
; GFX6-NEXT: v_mov_b32_e32 v3, 1
; GFX6-NEXT: v_mov_b32_e32 v4, 0
; GFX6-NEXT: v_and_b32_e32 v3, 1, v2
; GFX6-NEXT: v_lshl_b64 v[2:3], v[3:4], 31
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_mov_b32_e32 v1, 0
; GFX6-NEXT: v_and_b32_e32 v0, 1, v2
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v3
; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_lshr_i65_33:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v3, v1
; GFX8-NEXT: v_mov_b32_e32 v0, 1
; GFX8-NEXT: v_mov_b32_e32 v3, 1
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: v_and_b32_e32 v3, 1, v2
; GFX8-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v1
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v2
; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v3
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_lshr_i65_33:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v3, v1
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: v_mov_b32_e32 v3, 1
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_and_b32_e32 v3, 1, v2
; GFX9-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v1
; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v2
; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v3
; GFX9-NEXT: v_or_b32_e32 v0, v2, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_lshr_i65_33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v3, v1
; GFX10-NEXT: v_mov_b32_e32 v0, 1
; GFX10-NEXT: v_mov_b32_e32 v3, 1
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: v_and_b32_e32 v3, 1, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v1
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_and_b32_e32 v0, 1, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 1, v3
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
; GFX10-NEXT: v_or_b32_e32 v0, v2, v0
; GFX10-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_lshr_i65_33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, 1
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 1, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 1, v3
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
; GFX11-NEXT: v_or_b32_e32 v0, v2, v0
; GFX11-NEXT: v_mov_b32_e32 v3, 1
; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_and_b32 v3, 1, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v1
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = lshr i65 %value, 33
Expand Down
61 changes: 21 additions & 40 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,10 @@ define amdgpu_ps i7 @s_saddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX8-NEXT: s_min_i32 s2, s2, 0
; GFX8-NEXT: s_lshl_b32 s1, s1, 9
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_max_i32 s1, s2, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s3
; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, s1
Expand Down Expand Up @@ -189,11 +188,10 @@ define amdgpu_ps i8 @s_saddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX8-NEXT: s_min_i32 s2, s2, 0
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_max_i32 s1, s2, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s3
; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, s1
Expand Down Expand Up @@ -386,11 +384,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_lshr_b32 s3, s1, 8
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_max_i32 s1, s4, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s4, s5
; GFX8-NEXT: s_min_i32 s1, s1, s4
; GFX8-NEXT: s_add_i32 s0, s0, s1
Expand All @@ -400,11 +397,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s4, s3, 0
; GFX8-NEXT: s_min_i32 s3, s3, 0
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_max_i32 s2, s3, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s4
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_add_i32 s1, s1, s2
Expand Down Expand Up @@ -787,11 +783,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_lshr_b32 s7, s1, 24
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_sub_i32 s8, 0x8000, s8
; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
; GFX8-NEXT: s_max_i32 s1, s8, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s8, s9
; GFX8-NEXT: s_min_i32 s1, s1, s8
; GFX8-NEXT: s_add_i32 s0, s0, s1
Expand All @@ -801,11 +796,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s8, s5, 0
; GFX8-NEXT: s_min_i32 s5, s5, 0
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
; GFX8-NEXT: s_sub_i32 s8, 0x7fff, s8
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sub_i32 s8, 0x7fff, s8
; GFX8-NEXT: s_max_i32 s2, s5, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s5, s8
; GFX8-NEXT: s_min_i32 s2, s2, s5
; GFX8-NEXT: s_add_i32 s1, s1, s2
Expand All @@ -815,11 +809,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s6, s5, 0
; GFX8-NEXT: s_min_i32 s5, s5, 0
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
; GFX8-NEXT: s_max_i32 s3, s5, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s6
; GFX8-NEXT: s_min_i32 s3, s3, s5
; GFX8-NEXT: s_add_i32 s2, s2, s3
Expand All @@ -829,14 +822,13 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_min_i32 s5, s5, 0
; GFX8-NEXT: s_lshl_b32 s4, s7, 8
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
; GFX8-NEXT: s_max_i32 s4, s5, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_ashr_i32 s1, s1, 8
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_max_i32 s4, s5, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s6
; GFX8-NEXT: s_ashr_i32 s0, s0, 8
; GFX8-NEXT: s_sext_i32_i16 s2, s2
Expand Down Expand Up @@ -2631,11 +2623,10 @@ define amdgpu_ps i16 @s_saddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
; GFX8-NEXT: s_max_i32 s3, s2, 0
; GFX8-NEXT: s_min_i32 s2, s2, 0
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_max_i32 s1, s2, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s3
; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, s1
Expand Down Expand Up @@ -2835,11 +2826,10 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX8-NEXT: s_max_i32 s4, s3, 0
; GFX8-NEXT: s_min_i32 s3, s3, 0
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s1
; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_max_i32 s3, s3, s5
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_min_i32 s3, s3, s4
Expand Down Expand Up @@ -3190,11 +3180,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX8-NEXT: s_max_i32 s7, s6, 0
; GFX8-NEXT: s_min_i32 s6, s6, 0
; GFX8-NEXT: s_sub_i32 s6, 0x8000, s6
; GFX8-NEXT: s_sub_i32 s7, 0x7fff, s7
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s8, s2
; GFX8-NEXT: s_sub_i32 s7, 0x7fff, s7
; GFX8-NEXT: s_max_i32 s6, s6, s8
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s7, s7
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
; GFX8-NEXT: s_min_i32 s6, s6, s7
Expand All @@ -3215,11 +3204,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX8-NEXT: s_max_i32 s6, s2, 0
; GFX8-NEXT: s_min_i32 s2, s2, 0
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s7, s3
; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
; GFX8-NEXT: s_max_i32 s2, s2, s7
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
; GFX8-NEXT: s_min_i32 s2, s2, s6
Expand Down Expand Up @@ -3513,11 +3501,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_max_i32 s10, s9, 0
; GFX8-NEXT: s_min_i32 s9, s9, 0
; GFX8-NEXT: s_sub_i32 s9, 0x8000, s9
; GFX8-NEXT: s_sub_i32 s10, 0x7fff, s10
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s11, s3
; GFX8-NEXT: s_sub_i32 s10, 0x7fff, s10
; GFX8-NEXT: s_max_i32 s9, s9, s11
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s10, s10
; GFX8-NEXT: s_lshr_b32 s6, s0, 16
; GFX8-NEXT: s_min_i32 s9, s9, s10
Expand All @@ -3538,11 +3525,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_max_i32 s9, s3, 0
; GFX8-NEXT: s_min_i32 s3, s3, 0
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s10, s4
; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
; GFX8-NEXT: s_max_i32 s3, s3, s10
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_lshr_b32 s7, s1, 16
; GFX8-NEXT: s_min_i32 s3, s3, s9
Expand All @@ -3563,11 +3549,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_max_i32 s4, s3, 0
; GFX8-NEXT: s_min_i32 s3, s3, 0
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s9, s5
; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_max_i32 s3, s3, s9
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_lshr_b32 s8, s2, 16
; GFX8-NEXT: s_min_i32 s3, s3, s4
Expand Down Expand Up @@ -3924,11 +3909,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_max_i32 s13, s12, 0
; GFX8-NEXT: s_min_i32 s12, s12, 0
; GFX8-NEXT: s_sub_i32 s12, 0x8000, s12
; GFX8-NEXT: s_sub_i32 s13, 0x7fff, s13
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s14, s4
; GFX8-NEXT: s_sub_i32 s13, 0x7fff, s13
; GFX8-NEXT: s_max_i32 s12, s12, s14
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s13, s13
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
; GFX8-NEXT: s_min_i32 s12, s12, s13
Expand All @@ -3949,11 +3933,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_max_i32 s12, s4, 0
; GFX8-NEXT: s_min_i32 s4, s4, 0
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
; GFX8-NEXT: s_sub_i32 s12, 0x7fff, s12
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s13, s5
; GFX8-NEXT: s_sub_i32 s12, 0x7fff, s12
; GFX8-NEXT: s_max_i32 s4, s4, s13
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
; GFX8-NEXT: s_min_i32 s4, s4, s12
Expand All @@ -3974,11 +3957,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_max_i32 s5, s4, 0
; GFX8-NEXT: s_min_i32 s4, s4, 0
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s12, s6
; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_max_i32 s4, s4, s12
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_lshr_b32 s10, s2, 16
; GFX8-NEXT: s_min_i32 s4, s4, s5
Expand All @@ -3999,11 +3981,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_max_i32 s5, s4, 0
; GFX8-NEXT: s_min_i32 s4, s4, 0
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s6, s7
; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_max_i32 s4, s4, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_lshr_b32 s11, s3, 16
; GFX8-NEXT: s_min_i32 s4, s4, s5
Expand Down
Loading
Loading