support extract_vector_elt with dynamic indices

Prince781 · Prince781 · commit d264e7340e3f · 2025-03-14T19:00:27.000-07:00
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -618,6 +618,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom);
   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);
 
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
+
   // Custom conversions to/from v2i8.
   setOperationAction(ISD::BITCAST, MVT::v2i8, Custom);
 
@@ -2251,7 +2253,8 @@ SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
     return Op;
 
   // Extract individual elements and select one of them.
-  assert(Isv2x16VT(VectorVT) && "Unexpected vector type.");
+  assert((Isv2x16VT(VectorVT) || VectorVT == MVT::v2f32) &&
+         "Unexpected vector type.");
   EVT EltVT = VectorVT.getVectorElementType();
 
   SDLoc dl(Op.getNode());
diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
@@ -37,23 +37,10 @@ define float @test_extract_1(<2 x float> %a) #0 {
 ; NOTE: disabled as -O3 miscompiles this into pointer arithmetic on
 ; test_extract_i_param_0 where the symbol's address is not taken first (that
 ; is, moved to a temporary)
-; define float @test_extract_i(<2 x float> %a, i64 %idx) #0 {
-; ; CHECK-LABEL: test_extract_i(
-; ; CHECK:       {
-; ; CHECK-NEXT:    .reg .pred %p<2>;
-; ; CHECK-NEXT:    .reg .f32 %f<4>;
-; ; CHECK-NEXT:    .reg .b64 %rd<2>;
-; ; CHECK-EMPTY:
-; ; CHECK-NEXT:  // %bb.0:
-; ; CHECK-NEXT:    ld.param.v2.f32 {%f1, %f2}, [test_extract_i_param_0];
-; ; CHECK-NEXT:    ld.param.u64 %rd1, [test_extract_i_param_1];
-; ; CHECK-NEXT:    setp.eq.s64 %p1, %rd1, 0;
-; ; CHECK-NEXT:    selp.f32 %f3, %f1, %f2, %p1;
-; ; CHECK-NEXT:    st.param.f32 [func_retval0], %f3;
-; ; CHECK-NEXT:    ret;
-;   %e = extractelement <2 x float> %a, i64 %idx
-;   ret float %e
-; }
+define float @test_extract_i(<2 x float> %a, i64 %idx) #0 {
+  %e = extractelement <2 x float> %a, i64 %idx
+  ret float %e
+}
 
 define <2 x float> @test_fadd(<2 x float> %a, <2 x float> %b) #0 {
   %r = fadd <2 x float> %a, %b