@@ -1769,8 +1769,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1769
1769
; GFX9-NEXT: s_mov_b32 s0, 0
1770
1770
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
1771
1771
; GFX9-NEXT: s_waitcnt vmcnt(0)
1772
+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1772
1773
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1773
- ; GFX9-NEXT: s_movk_i32 s0, 0x3e84
1774
+ ; GFX9-NEXT: s_add_i32 s0, s0, 4
1774
1775
; GFX9-NEXT: scratch_store_dword off, v0, s0
1775
1776
; GFX9-NEXT: s_waitcnt vmcnt(0)
1776
1777
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1785,7 +1786,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1785
1786
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
1786
1787
; GFX10-NEXT: v_mov_b32_e32 v0, 13
1787
1788
; GFX10-NEXT: v_mov_b32_e32 v1, 15
1788
- ; GFX10-NEXT: s_movk_i32 s0, 0x3e84
1789
+ ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1790
+ ; GFX10-NEXT: s_add_i32 s0, s0, 4
1789
1791
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
1790
1792
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1791
1793
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1797,10 +1799,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1797
1799
; GFX942-LABEL: store_load_large_imm_offset_kernel:
1798
1800
; GFX942: ; %bb.0: ; %bb
1799
1801
; GFX942-NEXT: v_mov_b32_e32 v0, 13
1802
+ ; GFX942-NEXT: s_movk_i32 s0, 0x3e80
1800
1803
; GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
1801
1804
; GFX942-NEXT: s_waitcnt vmcnt(0)
1802
1805
; GFX942-NEXT: v_mov_b32_e32 v0, 15
1803
- ; GFX942-NEXT: s_movk_i32 s0, 0x3e84
1806
+ ; GFX942-NEXT: s_add_i32 s0, s0, 4
1804
1807
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
1805
1808
; GFX942-NEXT: s_waitcnt vmcnt(0)
1806
1809
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1810,7 +1813,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1810
1813
; GFX11-LABEL: store_load_large_imm_offset_kernel:
1811
1814
; GFX11: ; %bb.0: ; %bb
1812
1815
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1813
- ; GFX11-NEXT: s_movk_i32 s0, 0x3e84
1816
+ ; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1817
+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1818
+ ; GFX11-NEXT: s_add_i32 s0, s0, 4
1814
1819
; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
1815
1820
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1816
1821
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1838,8 +1843,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1838
1843
; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0
1839
1844
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
1840
1845
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1846
+ ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
1841
1847
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1842
- ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e84
1848
+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 4
1843
1849
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
1844
1850
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1845
1851
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1854,7 +1860,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1854
1860
; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
1855
1861
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
1856
1862
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
1857
- ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e84
1863
+ ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
1864
+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 4
1858
1865
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, off offset:4
1859
1866
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1860
1867
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1866,10 +1873,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1866
1873
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel:
1867
1874
; UNALIGNED_GFX942: ; %bb.0: ; %bb
1868
1875
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
1876
+ ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
1869
1877
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
1870
1878
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
1871
1879
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
1872
- ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e84
1880
+ ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s0, 4
1873
1881
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
1874
1882
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
1875
1883
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1879,7 +1887,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1879
1887
; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
1880
1888
; UNALIGNED_GFX11: ; %bb.0: ; %bb
1881
1889
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1882
- ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e84
1890
+ ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
1891
+ ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1892
+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 4
1883
1893
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
1884
1894
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1885
1895
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1913,11 +1923,13 @@ define void @store_load_large_imm_offset_foo() {
1913
1923
; GFX9-LABEL: store_load_large_imm_offset_foo:
1914
1924
; GFX9: ; %bb.0: ; %bb
1915
1925
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926
+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1916
1927
; GFX9-NEXT: v_mov_b32_e32 v0, 13
1928
+ ; GFX9-NEXT: s_add_i32 s1, s32, s0
1917
1929
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
1918
1930
; GFX9-NEXT: s_waitcnt vmcnt(0)
1919
1931
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1920
- ; GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
1932
+ ; GFX9-NEXT: s_add_i32 s0, s1, 4
1921
1933
; GFX9-NEXT: scratch_store_dword off, v0, s0
1922
1934
; GFX9-NEXT: s_waitcnt vmcnt(0)
1923
1935
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1928,8 +1940,10 @@ define void @store_load_large_imm_offset_foo() {
1928
1940
; GFX10: ; %bb.0: ; %bb
1929
1941
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1930
1942
; GFX10-NEXT: v_mov_b32_e32 v0, 13
1943
+ ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1931
1944
; GFX10-NEXT: v_mov_b32_e32 v1, 15
1932
- ; GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
1945
+ ; GFX10-NEXT: s_add_i32 s1, s32, s0
1946
+ ; GFX10-NEXT: s_add_i32 s0, s1, 4
1933
1947
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
1934
1948
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1935
1949
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1941,11 +1955,13 @@ define void @store_load_large_imm_offset_foo() {
1941
1955
; GFX942-LABEL: store_load_large_imm_offset_foo:
1942
1956
; GFX942: ; %bb.0: ; %bb
1943
1957
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1958
+ ; GFX942-NEXT: s_movk_i32 s0, 0x3e80
1944
1959
; GFX942-NEXT: v_mov_b32_e32 v0, 13
1960
+ ; GFX942-NEXT: s_add_i32 s1, s32, s0
1945
1961
; GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
1946
1962
; GFX942-NEXT: s_waitcnt vmcnt(0)
1947
1963
; GFX942-NEXT: v_mov_b32_e32 v0, 15
1948
- ; GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
1964
+ ; GFX942-NEXT: s_add_i32 s0, s1, 4
1949
1965
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
1950
1966
; GFX942-NEXT: s_waitcnt vmcnt(0)
1951
1967
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1956,7 +1972,10 @@ define void @store_load_large_imm_offset_foo() {
1956
1972
; GFX11: ; %bb.0: ; %bb
1957
1973
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1958
1974
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1959
- ; GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
1975
+ ; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1976
+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1977
+ ; GFX11-NEXT: s_add_i32 s1, s32, s0
1978
+ ; GFX11-NEXT: s_add_i32 s0, s1, 4
1960
1979
; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
1961
1980
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1962
1981
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1985,11 +2004,13 @@ define void @store_load_large_imm_offset_foo() {
1985
2004
; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
1986
2005
; UNALIGNED_GFX9: ; %bb.0: ; %bb
1987
2006
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2007
+ ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
1988
2008
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
2009
+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
1989
2010
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
1990
2011
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1991
2012
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1992
- ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
2013
+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
1993
2014
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
1994
2015
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1995
2016
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2000,8 +2021,10 @@ define void @store_load_large_imm_offset_foo() {
2000
2021
; UNALIGNED_GFX10: ; %bb.0: ; %bb
2001
2022
; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2002
2023
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2024
+ ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
2003
2025
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2004
- ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
2026
+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
2027
+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
2005
2028
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
2006
2029
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2007
2030
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2013,11 +2036,13 @@ define void @store_load_large_imm_offset_foo() {
2013
2036
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo:
2014
2037
; UNALIGNED_GFX942: ; %bb.0: ; %bb
2015
2038
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039
+ ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
2016
2040
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
2041
+ ; UNALIGNED_GFX942-NEXT: s_add_i32 s1, s32, s0
2017
2042
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
2018
2043
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
2019
2044
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
2020
- ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
2045
+ ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s1, 4
2021
2046
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
2022
2047
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
2023
2048
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2028,7 +2053,10 @@ define void @store_load_large_imm_offset_foo() {
2028
2053
; UNALIGNED_GFX11: ; %bb.0: ; %bb
2029
2054
; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2030
2055
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2031
- ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
2056
+ ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
2057
+ ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2058
+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s1, s32, s0
2059
+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s1, 4
2032
2060
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
2033
2061
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2034
2062
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
0 commit comments