Skip to content

Commit ff96732

Browse files
committed
[NVPTX] add coverage for v2f32 in ldg-invariant and fp-contract
for fp-contract: - test folding of fma.f32x2 - bump SM version to 100 for ldg-invariant: - test proper splitting of loads on vectors of f32
1 parent b4c7b0f commit ff96732

File tree

1 file changed

+70
-0
lines changed

1 file changed

+70
-0
lines changed

llvm/test/CodeGen/NVPTX/ldg-invariant.ll

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,76 @@ define half @ld_global_v8f16(ptr addrspace(1) %ptr) {
125125
ret half %sum
126126
}
127127

128+
define float @ld_global_v2f32(ptr addrspace(1) %ptr) {
129+
; CHECK-LABEL: ld_global_v2f32(
130+
; CHECK: {
131+
; CHECK-NEXT: .reg .f32 %f<4>;
132+
; CHECK-NEXT: .reg .b64 %rd<2>;
133+
; CHECK-EMPTY:
134+
; CHECK-NEXT: // %bb.0:
135+
; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v2f32_param_0];
136+
; CHECK-NEXT: ld.global.nc.v2.f32 {%f1, %f2}, [%rd1];
137+
; CHECK-NEXT: add.rn.f32 %f3, %f1, %f2;
138+
; CHECK-NEXT: st.param.f32 [func_retval0], %f3;
139+
; CHECK-NEXT: ret;
140+
%a = load <2 x float>, ptr addrspace(1) %ptr, !invariant.load !0
141+
%v1 = extractelement <2 x float> %a, i32 0
142+
%v2 = extractelement <2 x float> %a, i32 1
143+
%sum = fadd float %v1, %v2
144+
ret float %sum
145+
}
146+
147+
define float @ld_global_v4f32(ptr addrspace(1) %ptr) {
148+
; CHECK-LABEL: ld_global_v4f32(
149+
; CHECK: {
150+
; CHECK-NEXT: .reg .f32 %f<8>;
151+
; CHECK-NEXT: .reg .b64 %rd<2>;
152+
; CHECK-EMPTY:
153+
; CHECK-NEXT: // %bb.0:
154+
; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v4f32_param_0];
155+
; CHECK-NEXT: ld.global.nc.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1];
156+
; CHECK-NEXT: add.rn.f32 %f5, %f1, %f2;
157+
; CHECK-NEXT: add.rn.f32 %f6, %f3, %f4;
158+
; CHECK-NEXT: add.rn.f32 %f7, %f5, %f6;
159+
; CHECK-NEXT: st.param.f32 [func_retval0], %f7;
160+
; CHECK-NEXT: ret;
161+
%a = load <4 x float>, ptr addrspace(1) %ptr, !invariant.load !0
162+
%v1 = extractelement <4 x float> %a, i32 0
163+
%v2 = extractelement <4 x float> %a, i32 1
164+
%v3 = extractelement <4 x float> %a, i32 2
165+
%v4 = extractelement <4 x float> %a, i32 3
166+
%sum1 = fadd float %v1, %v2
167+
%sum2 = fadd float %v3, %v4
168+
%sum = fadd float %sum1, %sum2
169+
ret float %sum
170+
}
171+
172+
define float @ld_global_v8f32(ptr addrspace(1) %ptr) {
173+
; CHECK-LABEL: ld_global_v8f32(
174+
; CHECK: {
175+
; CHECK-NEXT: .reg .f32 %f<12>;
176+
; CHECK-NEXT: .reg .b64 %rd<2>;
177+
; CHECK-EMPTY:
178+
; CHECK-NEXT: // %bb.0:
179+
; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v8f32_param_0];
180+
; CHECK-NEXT: ld.global.nc.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1+16];
181+
; CHECK-NEXT: ld.global.nc.v4.f32 {%f5, %f6, %f7, %f8}, [%rd1];
182+
; CHECK-NEXT: add.rn.f32 %f9, %f5, %f7;
183+
; CHECK-NEXT: add.rn.f32 %f10, %f1, %f3;
184+
; CHECK-NEXT: add.rn.f32 %f11, %f9, %f10;
185+
; CHECK-NEXT: st.param.f32 [func_retval0], %f11;
186+
; CHECK-NEXT: ret;
187+
%a = load <8 x float>, ptr addrspace(1) %ptr, !invariant.load !0
188+
%v1 = extractelement <8 x float> %a, i32 0
189+
%v2 = extractelement <8 x float> %a, i32 2
190+
%v3 = extractelement <8 x float> %a, i32 4
191+
%v4 = extractelement <8 x float> %a, i32 6
192+
%sum1 = fadd float %v1, %v2
193+
%sum2 = fadd float %v3, %v4
194+
%sum = fadd float %sum1, %sum2
195+
ret float %sum
196+
}
197+
128198
define i8 @ld_global_v8i8(ptr addrspace(1) %ptr) {
129199
; CHECK-LABEL: ld_global_v8i8(
130200
; CHECK: {

0 commit comments

Comments
 (0)