Skip to content

Commit c7f74b8

Browse files
committed
[NVPTX] use sink symbol for single-element unpacking of v2f32s
1 parent 495c516 commit c7f74b8

File tree

2 files changed

+17
-2
lines changed

2 files changed

+17
-2
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2861,6 +2861,16 @@ let hasSideEffects = false in {
28612861
(ins Int64Regs:$s),
28622862
"{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}",
28632863
[]>;
2864+
def I64toF32HS : NVPTXInst<(outs Float32Regs:$high),
2865+
(ins Int64Regs:$s),
2866+
"mov.b64 {{_, $high}}, $s;",
2867+
[]>,
2868+
Requires<[hasPTX<71>]>;
2869+
def I64toF32LS : NVPTXInst<(outs Float32Regs:$low),
2870+
(ins Int64Regs:$s),
2871+
"mov.b64 {{$low, _}}, $s;",
2872+
[]>,
2873+
Requires<[hasPTX<71>]>;
28642874

28652875
// PTX 7.1 lets you avoid a temp register and just use _ as a "sink" for the
28662876
// unused high/low part.
@@ -2904,6 +2914,11 @@ foreach vt = [v2f16, v2bf16, v2i16] in {
29042914
def : Pat<(extractelt vt:$src, 1), (I32toI16H $src)>;
29052915
}
29062916

2917+
def : Pat<(extractelt v2f32:$src, 0),
2918+
(I64toF32LS $src)>, Requires<[hasPTX<71>]>;
2919+
def : Pat<(extractelt v2f32:$src, 1),
2920+
(I64toF32HS $src)>, Requires<[hasPTX<71>]>;
2921+
29072922
def : Pat<(extractelt v2f32:$src, 0),
29082923
(I64toF32L $src)>;
29092924
def : Pat<(extractelt v2f32:$src, 1),

llvm/test/CodeGen/NVPTX/f32x2-instructions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ define float @test_extract_0(<2 x float> %a) #0 {
3333
; CHECK-EMPTY:
3434
; CHECK-NEXT: // %bb.0:
3535
; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_0_param_0];
36-
; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {%f1, tmp}, %rd1; }
36+
; CHECK-NEXT: mov.b64 {%f1, _}, %rd1;
3737
; CHECK-NEXT: st.param.b32 [func_retval0], %f1;
3838
; CHECK-NEXT: ret;
3939
%e = extractelement <2 x float> %a, i32 0
@@ -48,7 +48,7 @@ define float @test_extract_1(<2 x float> %a) #0 {
4848
; CHECK-EMPTY:
4949
; CHECK-NEXT: // %bb.0:
5050
; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_1_param_0];
51-
; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %f1}, %rd1; }
51+
; CHECK-NEXT: mov.b64 {_, %f1}, %rd1;
5252
; CHECK-NEXT: st.param.b32 [func_retval0], %f1;
5353
; CHECK-NEXT: ret;
5454
%e = extractelement <2 x float> %a, i32 1

0 commit comments

Comments
 (0)