Skip to content

Commit 40051c9

Browse files
committed
revert a316539 AMDGPU: Fix overly conservative immediate operand check
1 parent d92de5a commit 40051c9

17 files changed

+184
-85
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -821,8 +821,7 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
821821
if (UseOpIdx >= Desc.getNumOperands())
822822
return false;
823823

824-
// Filter out unhandled pseudos.
825-
if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx))
824+
if (!AMDGPU::isSISrcInlinableOperand(Desc, UseOpIdx))
826825
return false;
827826

828827
uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType;

llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -920,7 +920,9 @@ define amdgpu_ps i64 @s_andn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1
920920
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
921921
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
922922
; GFX6-NEXT: s_or_b32 s3, s3, s4
923-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
923+
; GFX6-NEXT: s_mov_b32 s4, -1
924+
; GFX6-NEXT: s_mov_b32 s5, s4
925+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
924926
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
925927
; GFX6-NEXT: ; return to shader part epilog
926928
;
@@ -960,7 +962,9 @@ define amdgpu_ps i64 @s_andn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inr
960962
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
961963
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
962964
; GFX6-NEXT: s_or_b32 s3, s3, s4
963-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
965+
; GFX6-NEXT: s_mov_b32 s4, -1
966+
; GFX6-NEXT: s_mov_b32 s5, s4
967+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
964968
; GFX6-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
965969
; GFX6-NEXT: ; return to shader part epilog
966970
;
@@ -1000,7 +1004,9 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_use(<4 x i16> inreg %src0, <4
10001004
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
10011005
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
10021006
; GFX6-NEXT: s_or_b32 s3, s3, s4
1003-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
1007+
; GFX6-NEXT: s_mov_b32 s4, -1
1008+
; GFX6-NEXT: s_mov_b32 s5, s4
1009+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
10041010
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
10051011
; GFX6-NEXT: ; return to shader part epilog
10061012
;
@@ -1054,7 +1060,9 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_foldable_use(<4 x i16> inreg
10541060
; GFX6-NEXT: s_lshl_b32 s5, s13, 16
10551061
; GFX6-NEXT: s_and_b32 s6, s12, 0xffff
10561062
; GFX6-NEXT: s_or_b32 s5, s5, s6
1057-
; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], -1
1063+
; GFX6-NEXT: s_mov_b32 s6, -1
1064+
; GFX6-NEXT: s_mov_b32 s7, s6
1065+
; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7]
10581066
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
10591067
; GFX6-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5]
10601068
; GFX6-NEXT: ; return to shader part epilog

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1769,8 +1769,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
17691769
; GFX9-NEXT: s_mov_b32 s0, 0
17701770
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
17711771
; GFX9-NEXT: s_waitcnt vmcnt(0)
1772+
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
17721773
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1773-
; GFX9-NEXT: s_movk_i32 s0, 0x3e84
1774+
; GFX9-NEXT: s_add_i32 s0, s0, 4
17741775
; GFX9-NEXT: scratch_store_dword off, v0, s0
17751776
; GFX9-NEXT: s_waitcnt vmcnt(0)
17761777
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1785,7 +1786,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
17851786
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
17861787
; GFX10-NEXT: v_mov_b32_e32 v0, 13
17871788
; GFX10-NEXT: v_mov_b32_e32 v1, 15
1788-
; GFX10-NEXT: s_movk_i32 s0, 0x3e84
1789+
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1790+
; GFX10-NEXT: s_add_i32 s0, s0, 4
17891791
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
17901792
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
17911793
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1797,10 +1799,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
17971799
; GFX942-LABEL: store_load_large_imm_offset_kernel:
17981800
; GFX942: ; %bb.0: ; %bb
17991801
; GFX942-NEXT: v_mov_b32_e32 v0, 13
1802+
; GFX942-NEXT: s_movk_i32 s0, 0x3e80
18001803
; GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
18011804
; GFX942-NEXT: s_waitcnt vmcnt(0)
18021805
; GFX942-NEXT: v_mov_b32_e32 v0, 15
1803-
; GFX942-NEXT: s_movk_i32 s0, 0x3e84
1806+
; GFX942-NEXT: s_add_i32 s0, s0, 4
18041807
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
18051808
; GFX942-NEXT: s_waitcnt vmcnt(0)
18061809
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1810,7 +1813,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18101813
; GFX11-LABEL: store_load_large_imm_offset_kernel:
18111814
; GFX11: ; %bb.0: ; %bb
18121815
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1813-
; GFX11-NEXT: s_movk_i32 s0, 0x3e84
1816+
; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1817+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1818+
; GFX11-NEXT: s_add_i32 s0, s0, 4
18141819
; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
18151820
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
18161821
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1838,8 +1843,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18381843
; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0
18391844
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
18401845
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1846+
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
18411847
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1842-
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e84
1848+
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 4
18431849
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
18441850
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
18451851
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1854,7 +1860,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18541860
; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
18551861
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
18561862
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
1857-
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e84
1863+
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
1864+
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 4
18581865
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, off offset:4
18591866
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
18601867
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1866,10 +1873,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18661873
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel:
18671874
; UNALIGNED_GFX942: ; %bb.0: ; %bb
18681875
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
1876+
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
18691877
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
18701878
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
18711879
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
1872-
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e84
1880+
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s0, 4
18731881
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
18741882
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
18751883
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1879,7 +1887,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18791887
; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
18801888
; UNALIGNED_GFX11: ; %bb.0: ; %bb
18811889
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1882-
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e84
1890+
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
1891+
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1892+
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 4
18831893
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
18841894
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
18851895
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1913,11 +1923,13 @@ define void @store_load_large_imm_offset_foo() {
19131923
; GFX9-LABEL: store_load_large_imm_offset_foo:
19141924
; GFX9: ; %bb.0: ; %bb
19151925
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926+
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
19161927
; GFX9-NEXT: v_mov_b32_e32 v0, 13
1928+
; GFX9-NEXT: s_add_i32 s1, s32, s0
19171929
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
19181930
; GFX9-NEXT: s_waitcnt vmcnt(0)
19191931
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1920-
; GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
1932+
; GFX9-NEXT: s_add_i32 s0, s1, 4
19211933
; GFX9-NEXT: scratch_store_dword off, v0, s0
19221934
; GFX9-NEXT: s_waitcnt vmcnt(0)
19231935
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1928,8 +1940,10 @@ define void @store_load_large_imm_offset_foo() {
19281940
; GFX10: ; %bb.0: ; %bb
19291941
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19301942
; GFX10-NEXT: v_mov_b32_e32 v0, 13
1943+
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
19311944
; GFX10-NEXT: v_mov_b32_e32 v1, 15
1932-
; GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
1945+
; GFX10-NEXT: s_add_i32 s1, s32, s0
1946+
; GFX10-NEXT: s_add_i32 s0, s1, 4
19331947
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
19341948
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
19351949
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1941,11 +1955,13 @@ define void @store_load_large_imm_offset_foo() {
19411955
; GFX942-LABEL: store_load_large_imm_offset_foo:
19421956
; GFX942: ; %bb.0: ; %bb
19431957
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1958+
; GFX942-NEXT: s_movk_i32 s0, 0x3e80
19441959
; GFX942-NEXT: v_mov_b32_e32 v0, 13
1960+
; GFX942-NEXT: s_add_i32 s1, s32, s0
19451961
; GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
19461962
; GFX942-NEXT: s_waitcnt vmcnt(0)
19471963
; GFX942-NEXT: v_mov_b32_e32 v0, 15
1948-
; GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
1964+
; GFX942-NEXT: s_add_i32 s0, s1, 4
19491965
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
19501966
; GFX942-NEXT: s_waitcnt vmcnt(0)
19511967
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1956,7 +1972,10 @@ define void @store_load_large_imm_offset_foo() {
19561972
; GFX11: ; %bb.0: ; %bb
19571973
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19581974
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1959-
; GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
1975+
; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1976+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1977+
; GFX11-NEXT: s_add_i32 s1, s32, s0
1978+
; GFX11-NEXT: s_add_i32 s0, s1, 4
19601979
; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
19611980
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
19621981
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1985,11 +2004,13 @@ define void @store_load_large_imm_offset_foo() {
19852004
; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
19862005
; UNALIGNED_GFX9: ; %bb.0: ; %bb
19872006
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2007+
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
19882008
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
2009+
; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
19892010
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
19902011
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
19912012
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1992-
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
2013+
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
19932014
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
19942015
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
19952016
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2000,8 +2021,10 @@ define void @store_load_large_imm_offset_foo() {
20002021
; UNALIGNED_GFX10: ; %bb.0: ; %bb
20012022
; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20022023
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2024+
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
20032025
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2004-
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
2026+
; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
2027+
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
20052028
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
20062029
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
20072030
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2013,11 +2036,13 @@ define void @store_load_large_imm_offset_foo() {
20132036
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo:
20142037
; UNALIGNED_GFX942: ; %bb.0: ; %bb
20152038
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039+
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
20162040
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
2041+
; UNALIGNED_GFX942-NEXT: s_add_i32 s1, s32, s0
20172042
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
20182043
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
20192044
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
2020-
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
2045+
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s1, 4
20212046
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
20222047
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
20232048
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2028,7 +2053,10 @@ define void @store_load_large_imm_offset_foo() {
20282053
; UNALIGNED_GFX11: ; %bb.0: ; %bb
20292054
; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20302055
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2031-
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
2056+
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
2057+
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2058+
; UNALIGNED_GFX11-NEXT: s_add_i32 s1, s32, s0
2059+
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s1, 4
20322060
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
20332061
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
20342062
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc

llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -919,7 +919,9 @@ define amdgpu_ps i64 @s_orn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1)
919919
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
920920
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
921921
; GFX6-NEXT: s_or_b32 s3, s3, s4
922-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
922+
; GFX6-NEXT: s_mov_b32 s4, -1
923+
; GFX6-NEXT: s_mov_b32 s5, s4
924+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
923925
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
924926
; GFX6-NEXT: ; return to shader part epilog
925927
;
@@ -959,7 +961,9 @@ define amdgpu_ps i64 @s_orn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inre
959961
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
960962
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
961963
; GFX6-NEXT: s_or_b32 s3, s3, s4
962-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
964+
; GFX6-NEXT: s_mov_b32 s4, -1
965+
; GFX6-NEXT: s_mov_b32 s5, s4
966+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
963967
; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
964968
; GFX6-NEXT: ; return to shader part epilog
965969
;
@@ -999,7 +1003,9 @@ define amdgpu_ps { i64, i64 } @s_orn2_v4i16_multi_use(<4 x i16> inreg %src0, <4
9991003
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
10001004
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
10011005
; GFX6-NEXT: s_or_b32 s3, s3, s4
1002-
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
1006+
; GFX6-NEXT: s_mov_b32 s4, -1
1007+
; GFX6-NEXT: s_mov_b32 s5, s4
1008+
; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
10031009
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
10041010
; GFX6-NEXT: ; return to shader part epilog
10051011
;
@@ -1053,7 +1059,9 @@ define amdgpu_ps { i64, i64 } @s_orn2_v4i16_multi_foldable_use(<4 x i16> inreg %
10531059
; GFX6-NEXT: s_lshl_b32 s5, s13, 16
10541060
; GFX6-NEXT: s_and_b32 s6, s12, 0xffff
10551061
; GFX6-NEXT: s_or_b32 s5, s5, s6
1056-
; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], -1
1062+
; GFX6-NEXT: s_mov_b32 s6, -1
1063+
; GFX6-NEXT: s_mov_b32 s7, s6
1064+
; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7]
10571065
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
10581066
; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
10591067
; GFX6-NEXT: ; return to shader part epilog

llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,13 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
118118
; GFX7-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7]
119119
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
120120
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
121+
; GFX7-NEXT: s_mov_b32 s8, -1
121122
; GFX7-NEXT: s_or_b32 s0, s1, s0
122123
; GFX7-NEXT: s_lshl_b32 s1, s3, 16
123124
; GFX7-NEXT: s_and_b32 s2, s2, 0xffff
125+
; GFX7-NEXT: s_mov_b32 s9, s8
124126
; GFX7-NEXT: s_or_b32 s1, s1, s2
125-
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], -1
127+
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[8:9]
126128
; GFX7-NEXT: ; return to shader part epilog
127129
;
128130
; GFX8-LABEL: scalar_xnor_v4i16_one_use:

llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,16 @@ define amdgpu_cs <2 x i32> @f() {
55
; CHECK-LABEL: f:
66
; CHECK: ; %bb.0: ; %bb
77
; CHECK-NEXT: s_mov_b32 s4, 0
8-
; CHECK-NEXT: s_mov_b32 s1, 0
98
; CHECK-NEXT: s_mov_b32 s5, s4
109
; CHECK-NEXT: s_mov_b32 s6, s4
1110
; CHECK-NEXT: s_mov_b32 s7, s4
11+
; CHECK-NEXT: s_mov_b32 s0, s4
1212
; CHECK-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
13+
; CHECK-NEXT: s_mov_b32 s1, s4
1314
; CHECK-NEXT: s_waitcnt vmcnt(0)
14-
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1]
15+
; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[0:1], v[0:1]
1516
; CHECK-NEXT: v_mov_b32_e32 v1, s4
17+
; CHECK-NEXT: s_mov_b32 s1, 0
1618
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1719
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
1820
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0

llvm/test/CodeGen/AMDGPU/constrained-shift.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,10 @@ define amdgpu_ps <4 x i32> @s_csh_v4i32(<4 x i32> inreg %a, <4 x i32> inreg %b)
192192
;
193193
; GISEL-LABEL: s_csh_v4i32:
194194
; GISEL: ; %bb.0:
195-
; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], 31
196-
; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], 31
195+
; GISEL-NEXT: s_mov_b32 s8, 31
196+
; GISEL-NEXT: s_mov_b32 s9, s8
197+
; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
198+
; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], s[8:9]
197199
; GISEL-NEXT: s_lshl_b32 s8, s0, s4
198200
; GISEL-NEXT: s_lshl_b32 s9, s1, s5
199201
; GISEL-NEXT: s_lshl_b32 s10, s2, s6

0 commit comments

Comments
 (0)