Skip to content

Commit 543c7b7

Browse files
committed
[NVPTX] use sink symbol for single-element unpacking of v2f32s
1 parent 495c516 commit 543c7b7

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2861,9 +2861,18 @@ let hasSideEffects = false in {
28612861
(ins Int64Regs:$s),
28622862
"{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}",
28632863
[]>;
2864-
28652864
// PTX 7.1 lets you avoid a temp register and just use _ as a "sink" for the
28662865
// unused high/low part.
2866+
def I64toF32H_Sink : NVPTXInst<(outs Float32Regs:$high),
2867+
(ins Int64Regs:$s),
2868+
"mov.b64 {{_, $high}}, $s;",
2869+
[]>,
2870+
Requires<[hasPTX<71>]>;
2871+
def I64toF32L_Sink : NVPTXInst<(outs Float32Regs:$low),
2872+
(ins Int64Regs:$s),
2873+
"mov.b64 {{$low, _}}, $s;",
2874+
[]>,
2875+
Requires<[hasPTX<71>]>;
28672876
def I32toI16H_Sink : NVPTXInst<(outs Int16Regs:$high),
28682877
(ins Int32Regs:$s),
28692878
"mov.b32 \t{{_, $high}}, $s;",
@@ -2904,6 +2913,11 @@ foreach vt = [v2f16, v2bf16, v2i16] in {
29042913
def : Pat<(extractelt vt:$src, 1), (I32toI16H $src)>;
29052914
}
29062915

2916+
def : Pat<(extractelt v2f32:$src, 0),
2917+
(I64toF32L_Sink $src)>, Requires<[hasPTX<71>]>;
2918+
def : Pat<(extractelt v2f32:$src, 1),
2919+
(I64toF32H_Sink $src)>, Requires<[hasPTX<71>]>;
2920+
29072921
def : Pat<(extractelt v2f32:$src, 0),
29082922
(I64toF32L $src)>;
29092923
def : Pat<(extractelt v2f32:$src, 1),

llvm/test/CodeGen/NVPTX/f32x2-instructions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ define float @test_extract_0(<2 x float> %a) #0 {
3333
; CHECK-EMPTY:
3434
; CHECK-NEXT: // %bb.0:
3535
; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_0_param_0];
36-
; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {%f1, tmp}, %rd1; }
36+
; CHECK-NEXT: mov.b64 {%f1, _}, %rd1;
3737
; CHECK-NEXT: st.param.b32 [func_retval0], %f1;
3838
; CHECK-NEXT: ret;
3939
%e = extractelement <2 x float> %a, i32 0
@@ -48,7 +48,7 @@ define float @test_extract_1(<2 x float> %a) #0 {
4848
; CHECK-EMPTY:
4949
; CHECK-NEXT: // %bb.0:
5050
; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_1_param_0];
51-
; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %f1}, %rd1; }
51+
; CHECK-NEXT: mov.b64 {_, %f1}, %rd1;
5252
; CHECK-NEXT: st.param.b32 [func_retval0], %f1;
5353
; CHECK-NEXT: ret;
5454
%e = extractelement <2 x float> %a, i32 1

0 commit comments

Comments
 (0)