Skip to content

Commit d264e73

Browse files
committed
support extract_vector_elt with dynamic indices
1 parent 677be9a commit d264e73

File tree

2 files changed

+8
-18
lines changed

2 files changed

+8
-18
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
618618
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom);
619619
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);
620620

621+
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
622+
621623
// Custom conversions to/from v2i8.
622624
setOperationAction(ISD::BITCAST, MVT::v2i8, Custom);
623625

@@ -2251,7 +2253,8 @@ SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
22512253
return Op;
22522254

22532255
// Extract individual elements and select one of them.
2254-
assert(Isv2x16VT(VectorVT) && "Unexpected vector type.");
2256+
assert((Isv2x16VT(VectorVT) || VectorVT == MVT::v2f32) &&
2257+
"Unexpected vector type.");
22552258
EVT EltVT = VectorVT.getVectorElementType();
22562259

22572260
SDLoc dl(Op.getNode());

llvm/test/CodeGen/NVPTX/f32x2-instructions.ll

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,23 +37,10 @@ define float @test_extract_1(<2 x float> %a) #0 {
3737
; NOTE: disabled as -O3 miscompiles this into pointer arithmetic on
3838
; test_extract_i_param_0 where the symbol's address is not taken first (that
3939
; is, moved to a temporary)
40-
; define float @test_extract_i(<2 x float> %a, i64 %idx) #0 {
41-
; ; CHECK-LABEL: test_extract_i(
42-
; ; CHECK: {
43-
; ; CHECK-NEXT: .reg .pred %p<2>;
44-
; ; CHECK-NEXT: .reg .f32 %f<4>;
45-
; ; CHECK-NEXT: .reg .b64 %rd<2>;
46-
; ; CHECK-EMPTY:
47-
; ; CHECK-NEXT: // %bb.0:
48-
; ; CHECK-NEXT: ld.param.v2.f32 {%f1, %f2}, [test_extract_i_param_0];
49-
; ; CHECK-NEXT: ld.param.u64 %rd1, [test_extract_i_param_1];
50-
; ; CHECK-NEXT: setp.eq.s64 %p1, %rd1, 0;
51-
; ; CHECK-NEXT: selp.f32 %f3, %f1, %f2, %p1;
52-
; ; CHECK-NEXT: st.param.f32 [func_retval0], %f3;
53-
; ; CHECK-NEXT: ret;
54-
; %e = extractelement <2 x float> %a, i64 %idx
55-
; ret float %e
56-
; }
40+
define float @test_extract_i(<2 x float> %a, i64 %idx) #0 {
41+
%e = extractelement <2 x float> %a, i64 %idx
42+
ret float %e
43+
}
5744

5845
define <2 x float> @test_fadd(<2 x float> %a, <2 x float> %b) #0 {
5946
%r = fadd <2 x float> %a, %b

0 commit comments

Comments
 (0)