@@ -125,6 +125,76 @@ define half @ld_global_v8f16(ptr addrspace(1) %ptr) {
125
125
ret half %sum
126
126
}
127
127
128
+ define float @ld_global_v2f32 (ptr addrspace (1 ) %ptr ) {
129
+ ; CHECK-LABEL: ld_global_v2f32(
130
+ ; CHECK: {
131
+ ; CHECK-NEXT: .reg .f32 %f<4>;
132
+ ; CHECK-NEXT: .reg .b64 %rd<2>;
133
+ ; CHECK-EMPTY:
134
+ ; CHECK-NEXT: // %bb.0:
135
+ ; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v2f32_param_0];
136
+ ; CHECK-NEXT: ld.global.nc.v2.f32 {%f1, %f2}, [%rd1];
137
+ ; CHECK-NEXT: add.rn.f32 %f3, %f1, %f2;
138
+ ; CHECK-NEXT: st.param.f32 [func_retval0], %f3;
139
+ ; CHECK-NEXT: ret;
140
+ %a = load <2 x float >, ptr addrspace (1 ) %ptr , !invariant.load !0
141
+ %v1 = extractelement <2 x float > %a , i32 0
142
+ %v2 = extractelement <2 x float > %a , i32 1
143
+ %sum = fadd float %v1 , %v2
144
+ ret float %sum
145
+ }
146
+
147
+ define float @ld_global_v4f32 (ptr addrspace (1 ) %ptr ) {
148
+ ; CHECK-LABEL: ld_global_v4f32(
149
+ ; CHECK: {
150
+ ; CHECK-NEXT: .reg .f32 %f<8>;
151
+ ; CHECK-NEXT: .reg .b64 %rd<2>;
152
+ ; CHECK-EMPTY:
153
+ ; CHECK-NEXT: // %bb.0:
154
+ ; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v4f32_param_0];
155
+ ; CHECK-NEXT: ld.global.nc.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1];
156
+ ; CHECK-NEXT: add.rn.f32 %f5, %f1, %f2;
157
+ ; CHECK-NEXT: add.rn.f32 %f6, %f3, %f4;
158
+ ; CHECK-NEXT: add.rn.f32 %f7, %f5, %f6;
159
+ ; CHECK-NEXT: st.param.f32 [func_retval0], %f7;
160
+ ; CHECK-NEXT: ret;
161
+ %a = load <4 x float >, ptr addrspace (1 ) %ptr , !invariant.load !0
162
+ %v1 = extractelement <4 x float > %a , i32 0
163
+ %v2 = extractelement <4 x float > %a , i32 1
164
+ %v3 = extractelement <4 x float > %a , i32 2
165
+ %v4 = extractelement <4 x float > %a , i32 3
166
+ %sum1 = fadd float %v1 , %v2
167
+ %sum2 = fadd float %v3 , %v4
168
+ %sum = fadd float %sum1 , %sum2
169
+ ret float %sum
170
+ }
171
+
172
+ define float @ld_global_v8f32 (ptr addrspace (1 ) %ptr ) {
173
+ ; CHECK-LABEL: ld_global_v8f32(
174
+ ; CHECK: {
175
+ ; CHECK-NEXT: .reg .f32 %f<12>;
176
+ ; CHECK-NEXT: .reg .b64 %rd<2>;
177
+ ; CHECK-EMPTY:
178
+ ; CHECK-NEXT: // %bb.0:
179
+ ; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v8f32_param_0];
180
+ ; CHECK-NEXT: ld.global.nc.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1+16];
181
+ ; CHECK-NEXT: ld.global.nc.v4.f32 {%f5, %f6, %f7, %f8}, [%rd1];
182
+ ; CHECK-NEXT: add.rn.f32 %f9, %f5, %f7;
183
+ ; CHECK-NEXT: add.rn.f32 %f10, %f1, %f3;
184
+ ; CHECK-NEXT: add.rn.f32 %f11, %f9, %f10;
185
+ ; CHECK-NEXT: st.param.f32 [func_retval0], %f11;
186
+ ; CHECK-NEXT: ret;
187
+ %a = load <8 x float >, ptr addrspace (1 ) %ptr , !invariant.load !0
188
+ %v1 = extractelement <8 x float > %a , i32 0
189
+ %v2 = extractelement <8 x float > %a , i32 2
190
+ %v3 = extractelement <8 x float > %a , i32 4
191
+ %v4 = extractelement <8 x float > %a , i32 6
192
+ %sum1 = fadd float %v1 , %v2
193
+ %sum2 = fadd float %v3 , %v4
194
+ %sum = fadd float %sum1 , %sum2
195
+ ret float %sum
196
+ }
197
+
128
198
define i8 @ld_global_v8i8 (ptr addrspace (1 ) %ptr ) {
129
199
; CHECK-LABEL: ld_global_v8i8(
130
200
; CHECK: {
0 commit comments