Skip to content

Commit 57e9aec

Browse files
AMDGPU/GlobalISel: Add tests for missing readanylane combines
1 parent c6461ff commit 57e9aec

File tree

2 files changed

+559
-0
lines changed

2 files changed

+559
-0
lines changed
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck %s
3+
4+
define amdgpu_ps void @readanylane_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
5+
; CHECK-LABEL: readanylane_to_virtual_vgpr:
6+
; CHECK: ; %bb.0:
7+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
8+
; CHECK-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
9+
; CHECK-NEXT: s_waitcnt vmcnt(0)
10+
; CHECK-NEXT: global_store_dword v0, v1, s[2:3]
11+
; CHECK-NEXT: s_endpgm
12+
%load = load volatile float, ptr addrspace(1) %ptr0
13+
store float %load, ptr addrspace(1) %ptr1
14+
ret void
15+
}
16+
17+
define amdgpu_ps float @readanylane_to_physical_vgpr(ptr addrspace(1) inreg %ptr) {
18+
; CHECK-LABEL: readanylane_to_physical_vgpr:
19+
; CHECK: ; %bb.0:
20+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
21+
; CHECK-NEXT: global_load_dword v0, v0, s[0:1] glc dlc
22+
; CHECK-NEXT: s_waitcnt vmcnt(0)
23+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
24+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
25+
; CHECK-NEXT: ; return to shader part epilog
26+
%load = load volatile float, ptr addrspace(1) %ptr
27+
ret float %load
28+
}
29+
30+
define amdgpu_ps void @readanylane_to_bitcast_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
31+
; CHECK-LABEL: readanylane_to_bitcast_to_virtual_vgpr:
32+
; CHECK: ; %bb.0:
33+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
34+
; CHECK-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
35+
; CHECK-NEXT: s_waitcnt vmcnt(0)
36+
; CHECK-NEXT: v_readfirstlane_b32 s0, v1
37+
; CHECK-NEXT: v_mov_b32_e32 v1, s0
38+
; CHECK-NEXT: global_store_dword v0, v1, s[2:3]
39+
; CHECK-NEXT: s_endpgm
40+
%load = load volatile <2 x i16>, ptr addrspace(1) %ptr0
41+
%bitcast = bitcast <2 x i16> %load to i32
42+
store i32 %bitcast, ptr addrspace(1) %ptr1
43+
ret void
44+
}
45+
46+
define amdgpu_ps float @readanylane_to_bitcast_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
47+
; CHECK-LABEL: readanylane_to_bitcast_to_physical_vgpr:
48+
; CHECK: ; %bb.0:
49+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
50+
; CHECK-NEXT: global_load_dword v0, v0, s[0:1] glc dlc
51+
; CHECK-NEXT: s_waitcnt vmcnt(0)
52+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
53+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
54+
; CHECK-NEXT: ; return to shader part epilog
55+
%load = load volatile <2 x i16>, ptr addrspace(1) %ptr0
56+
%bitcast = bitcast <2 x i16> %load to float
57+
ret float %bitcast
58+
}
59+
60+
define amdgpu_ps void @unmerge_readanylane_merge_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
61+
; CHECK-LABEL: unmerge_readanylane_merge_to_virtual_vgpr:
62+
; CHECK: ; %bb.0:
63+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
64+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
65+
; CHECK-NEXT: s_waitcnt vmcnt(0)
66+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
67+
; CHECK-NEXT: v_readfirstlane_b32 s1, v1
68+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
69+
; CHECK-NEXT: v_mov_b32_e32 v1, s1
70+
; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
71+
; CHECK-NEXT: s_endpgm
72+
%load = load volatile i64, ptr addrspace(1) %ptr0
73+
store i64 %load, ptr addrspace(1) %ptr1
74+
ret void
75+
}
76+
77+
;define amdgpu_ps double @unmerge_readanylane_merge_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
78+
; %load = load volatile double, ptr addrspace(1) %ptr0
79+
; ret double %load
80+
;}
81+
82+
define amdgpu_ps void @unmerge_readanylane_merge_bitcast_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
83+
; CHECK-LABEL: unmerge_readanylane_merge_bitcast_to_virtual_vgpr:
84+
; CHECK: ; %bb.0:
85+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
86+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
87+
; CHECK-NEXT: s_waitcnt vmcnt(0)
88+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
89+
; CHECK-NEXT: v_readfirstlane_b32 s1, v1
90+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
91+
; CHECK-NEXT: v_mov_b32_e32 v1, s1
92+
; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
93+
; CHECK-NEXT: s_endpgm
94+
%load = load volatile <2 x i32>, ptr addrspace(1) %ptr0
95+
%bitcast = bitcast <2 x i32> %load to double
96+
store double %bitcast, ptr addrspace(1) %ptr1
97+
ret void
98+
}
99+
100+
;define amdgpu_ps double @unmerge_readanylane_merge_bitcast_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
101+
; %load = load volatile <2 x i32>, ptr addrspace(1) %ptr0
102+
; %bitcast = bitcast <2 x i32> %load to double
103+
; ret double %bitcast
104+
;}
105+
106+
define amdgpu_ps void @unmerge_readanylane_merge_extract_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
107+
; CHECK-LABEL: unmerge_readanylane_merge_extract_to_virtual_vgpr:
108+
; CHECK: ; %bb.0:
109+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
110+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
111+
; CHECK-NEXT: s_waitcnt vmcnt(0)
112+
; CHECK-NEXT: v_readfirstlane_b32 s0, v1
113+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
114+
; CHECK-NEXT: global_store_dword v2, v0, s[2:3]
115+
; CHECK-NEXT: s_endpgm
116+
%load = load volatile <2 x i32>, ptr addrspace(1) %ptr0
117+
%extracted = extractelement <2 x i32> %load, i32 1
118+
store i32 %extracted, ptr addrspace(1) %ptr1
119+
ret void
120+
}
121+
122+
define amdgpu_ps float @unmerge_readanylane_merge_extract_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
123+
; CHECK-LABEL: unmerge_readanylane_merge_extract_to_physical_vgpr:
124+
; CHECK: ; %bb.0:
125+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
126+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
127+
; CHECK-NEXT: s_waitcnt vmcnt(0)
128+
; CHECK-NEXT: v_readfirstlane_b32 s0, v1
129+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
130+
; CHECK-NEXT: ; return to shader part epilog
131+
%load = load volatile <2 x float>, ptr addrspace(1) %ptr0
132+
%extracted = extractelement <2 x float> %load, i32 1
133+
ret float %extracted
134+
}
135+
136+
define amdgpu_ps void @unmerge_readanylane_merge_extract_bitcast_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
137+
; CHECK-LABEL: unmerge_readanylane_merge_extract_bitcast_to_virtual_vgpr:
138+
; CHECK: ; %bb.0:
139+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
140+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
141+
; CHECK-NEXT: s_waitcnt vmcnt(0)
142+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
143+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
144+
; CHECK-NEXT: global_store_dword v2, v0, s[2:3]
145+
; CHECK-NEXT: s_endpgm
146+
%load = load volatile <4 x i16>, ptr addrspace(1) %ptr0
147+
%extracted = shufflevector <4 x i16> %load, <4 x i16> %load, <2 x i32> <i32 0, i32 1>
148+
%bitcast = bitcast <2 x i16> %extracted to float
149+
store float %bitcast, ptr addrspace(1) %ptr1
150+
ret void
151+
}
152+
153+
define amdgpu_ps float @unmerge_readanylane_merge_extract_bitcast_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
154+
; CHECK-LABEL: unmerge_readanylane_merge_extract_bitcast_to_physical_vgpr:
155+
; CHECK: ; %bb.0:
156+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
157+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
158+
; CHECK-NEXT: s_waitcnt vmcnt(0)
159+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
160+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
161+
; CHECK-NEXT: ; return to shader part epilog
162+
%load = load volatile <4 x i16>, ptr addrspace(1) %ptr0
163+
%extracted = shufflevector <4 x i16> %load, <4 x i16> %load, <2 x i32> <i32 0, i32 1>
164+
%bitcast = bitcast <2 x i16> %extracted to float
165+
ret float %bitcast
166+
}

0 commit comments

Comments
 (0)