Skip to content

Commit ad79e84

Browse files
committed
[NVPTX] use sink symbol for single-element unpacking of v2f32s
1 parent ea79229 commit ad79e84

File tree

2 files changed

+17
-2
lines changed

2 files changed

+17
-2
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3065,6 +3065,16 @@ let hasSideEffects = false in {
30653065
(ins Int64Regs:$s),
30663066
"{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}",
30673067
[]>;
3068+
def I64toF32HS : NVPTXInst<(outs Float32Regs:$high),
3069+
(ins Int64Regs:$s),
3070+
"mov.b64 {{_, $high}}, $s;",
3071+
[]>,
3072+
Requires<[hasPTX<71>]>;
3073+
def I64toF32LS : NVPTXInst<(outs Float32Regs:$low),
3074+
(ins Int64Regs:$s),
3075+
"mov.b64 {{$low, _}}, $s;",
3076+
[]>,
3077+
Requires<[hasPTX<71>]>;
30683078

30693079
}
30703080

@@ -3089,6 +3099,11 @@ def : Pat<(extractelt vt:$src, 1),
30893099
(I32toI16H $src)>;
30903100
}
30913101

3102+
def : Pat<(extractelt v2f32:$src, 0),
3103+
(I64toF32LS $src)>, Requires<[hasPTX<71>]>;
3104+
def : Pat<(extractelt v2f32:$src, 1),
3105+
(I64toF32HS $src)>, Requires<[hasPTX<71>]>;
3106+
30923107
def : Pat<(extractelt v2f32:$src, 0),
30933108
(I64toF32L $src)>;
30943109
def : Pat<(extractelt v2f32:$src, 1),

llvm/test/CodeGen/NVPTX/f32x2-instructions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ define float @test_extract_0(<2 x float> %a) #0 {
3333
; CHECK-EMPTY:
3434
; CHECK-NEXT: // %bb.0:
3535
; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_0_param_0];
36-
; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {%f1, tmp}, %rd1; }
36+
; CHECK-NEXT: mov.b64 {%f1, _}, %rd1;
3737
; CHECK-NEXT: st.param.f32 [func_retval0], %f1;
3838
; CHECK-NEXT: ret;
3939
%e = extractelement <2 x float> %a, i32 0
@@ -48,7 +48,7 @@ define float @test_extract_1(<2 x float> %a) #0 {
4848
; CHECK-EMPTY:
4949
; CHECK-NEXT: // %bb.0:
5050
; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_1_param_0];
51-
; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %f1}, %rd1; }
51+
; CHECK-NEXT: mov.b64 {_, %f1}, %rd1;
5252
; CHECK-NEXT: st.param.f32 [func_retval0], %f1;
5353
; CHECK-NEXT: ret;
5454
%e = extractelement <2 x float> %a, i32 1

0 commit comments

Comments
 (0)