Skip to content

Commit 955d811

Browse files
committed
[NVPTX] use sink symbol for single-element unpacking of v2f32s
1 parent be3f076 commit 955d811

File tree

2 files changed

+17
-2
lines changed

2 files changed

+17
-2
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2896,6 +2896,16 @@ let hasSideEffects = false in {
28962896
(ins Int64Regs:$s),
28972897
"{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}",
28982898
[]>;
2899+
def I64toF32HS : NVPTXInst<(outs Float32Regs:$high),
2900+
(ins Int64Regs:$s),
2901+
"mov.b64 {{_, $high}}, $s;",
2902+
[]>,
2903+
Requires<[hasPTX<71>]>;
2904+
def I64toF32LS : NVPTXInst<(outs Float32Regs:$low),
2905+
(ins Int64Regs:$s),
2906+
"mov.b64 {{$low, _}}, $s;",
2907+
[]>,
2908+
Requires<[hasPTX<71>]>;
28992909

29002910
// PTX 7.1 lets you avoid a temp register and just use _ as a "sink" for the
29012911
// unused high/low part.
@@ -2939,6 +2949,11 @@ foreach vt = [v2f16, v2bf16, v2i16] in {
29392949
def : Pat<(extractelt vt:$src, 1), (I32toI16H $src)>;
29402950
}
29412951

2952+
def : Pat<(extractelt v2f32:$src, 0),
2953+
(I64toF32LS $src)>, Requires<[hasPTX<71>]>;
2954+
def : Pat<(extractelt v2f32:$src, 1),
2955+
(I64toF32HS $src)>, Requires<[hasPTX<71>]>;
2956+
29422957
def : Pat<(extractelt v2f32:$src, 0),
29432958
(I64toF32L $src)>;
29442959
def : Pat<(extractelt v2f32:$src, 1),

llvm/test/CodeGen/NVPTX/f32x2-instructions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ define float @test_extract_0(<2 x float> %a) #0 {
3333
; CHECK-EMPTY:
3434
; CHECK-NEXT: // %bb.0:
3535
; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_0_param_0];
36-
; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {%f1, tmp}, %rd1; }
36+
; CHECK-NEXT: mov.b64 {%f1, _}, %rd1;
3737
; CHECK-NEXT: st.param.f32 [func_retval0], %f1;
3838
; CHECK-NEXT: ret;
3939
%e = extractelement <2 x float> %a, i32 0
@@ -48,7 +48,7 @@ define float @test_extract_1(<2 x float> %a) #0 {
4848
; CHECK-EMPTY:
4949
; CHECK-NEXT: // %bb.0:
5050
; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_1_param_0];
51-
; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %f1}, %rd1; }
51+
; CHECK-NEXT: mov.b64 {_, %f1}, %rd1;
5252
; CHECK-NEXT: st.param.f32 [func_retval0], %f1;
5353
; CHECK-NEXT: ret;
5454
%e = extractelement <2 x float> %a, i32 1

0 commit comments

Comments
 (0)