[RISCV] Add codegen support for ri.vinsert.v.x and ri.vextract.x.v (llvm#136708)

preames · IanWood1 · commit 0bdb85a613a5 · 2025-05-06T08:45:07.000-07:00
These instructions are included in XRivosVisni. They perform a scalar
insert into a vector (with a potentially non-zero index) and a scalar
extract from a vector (with a potentially non-zero index) respectively.
They're very analogous to vmv.s.x and vmv.x.s respectively.

The instructions do have a couple restrictions:
1) Only constant indices are supported w/a uimm5 format.
2) There are no FP variants.

One important property of these instructions is that their throughput
and latency are expected to be LMUL independent.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -6963,7 +6963,7 @@ static bool hasPassthruOp(unsigned Opcode) {
          Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
          "not a RISC-V target specific op");
   static_assert(
-      RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 &&
+      RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 134 &&
       RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
       "adding target specific op should update this function");
   if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
@@ -6987,7 +6987,7 @@ static bool hasMaskOp(unsigned Opcode) {
          Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
          "not a RISC-V target specific op");
   static_assert(
-      RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 &&
+      RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 134 &&
       RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
       "adding target specific op should update this function");
   if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
@@ -9595,6 +9595,13 @@ getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
   return SmallerVT;
 }
 
+static bool isValidVisniInsertExtractIndex(SDValue Idx) {
+  auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
+  if (!IdxC || isNullConstant(Idx))
+    return false;
+  return isUInt<5>(IdxC->getZExtValue());
+}
+
 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
 // first position of a vector, and that vector is slid up to the insert index.
 // By limiting the active vector length to index+1 and merging with the
@@ -9705,6 +9712,23 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
         return Vec;
       return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
     }
+
+    // Use ri.vinsert.v.x if available.
+    if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
+        isValidVisniInsertExtractIndex(Idx)) {
+      // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
+      SDValue PolicyOp =
+          DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
+      Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
+                        VL, PolicyOp);
+      if (AlignedIdx)
+        Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
+                          Vec, AlignedIdx);
+      if (!VecVT.isFixedLengthVector())
+        return Vec;
+      return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
+    }
+
     ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
   } else {
     // On RV32, i64-element vectors must be specially handled to place the
@@ -9904,6 +9928,14 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
     }
   }
 
+  // Use ri.vextract.x.v if available.
+  // TODO: Avoid index 0 and just use the vmv.x.s
+  if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
+      isValidVisniInsertExtractIndex(Idx)) {
+    SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
+    return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
+  }
+
   // If after narrowing, the required slide is still greater than LMUL2,
   // fallback to generic expansion and go through the stack.  This is done
   // for a subtle reason: extracting *all* elements out of a vector is
@@ -22321,12 +22353,14 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(VZEXT_VL)
   NODE_NAME_CASE(VCPOP_VL)
   NODE_NAME_CASE(VFIRST_VL)
+  NODE_NAME_CASE(RI_VINSERT_VL)
   NODE_NAME_CASE(RI_VZIPEVEN_VL)
   NODE_NAME_CASE(RI_VZIPODD_VL)
   NODE_NAME_CASE(RI_VZIP2A_VL)
   NODE_NAME_CASE(RI_VZIP2B_VL)
   NODE_NAME_CASE(RI_VUNZIP2A_VL)
   NODE_NAME_CASE(RI_VUNZIP2B_VL)
+  NODE_NAME_CASE(RI_VEXTRACT)
   NODE_NAME_CASE(READ_CSR)
   NODE_NAME_CASE(WRITE_CSR)
   NODE_NAME_CASE(SWAP_CSR)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -404,6 +404,10 @@ enum NodeType : unsigned {
   //  vfirst.m with additional mask and VL operands.
   VFIRST_VL,
 
+  // XRivosVisni
+  // VINSERT matches the semantics of ri.vinsert.v.x. It carries a VL operand.
+  RI_VINSERT_VL,
+
   // XRivosVizip
   RI_VZIPEVEN_VL,
   RI_VZIPODD_VL,
@@ -414,6 +418,12 @@ enum NodeType : unsigned {
 
   LAST_VL_VECTOR_OP = RI_VUNZIP2B_VL,
 
+  // XRivosVisni
+  // VEXTRACT matches the semantics of ri.vextract.x.v. The result is always
+  // XLenVT sign extended from the vector element size.  VEXTRACT does *not*
+  // have a VL operand.
+  RI_VEXTRACT,
+
   // Read VLENB CSR
   READ_VLENB,
   // Reads value of CSR.
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -94,6 +94,10 @@ static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
   }
 }
 
+static bool isVExtractInstr(const MachineInstr &MI) {
+  return RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::RI_VEXTRACT;
+}
+
 static bool isScalarExtractInstr(const MachineInstr &MI) {
   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
   default:
@@ -538,6 +542,12 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
     Res.MaskPolicy = false;
   }
 
+  if (isVExtractInstr(MI)) {
+    assert(!RISCVII::hasVLOp(TSFlags));
+    // TODO: LMUL can be any larger value (without cost)
+    Res.TailPolicy = false;
+  }
+
   return Res;
 }
 
@@ -1085,7 +1095,7 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
       InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
     }
   } else {
-    assert(isScalarExtractInstr(MI));
+    assert(isScalarExtractInstr(MI) || isVExtractInstr(MI));
     // Pick a random value for state tracking purposes, will be ignored via
     // the demanded fields mechanism
     InstrInfo.setAVLImm(1);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
@@ -128,3 +128,57 @@ def RI_VEXTRACT : CustomRivosXVI<0b010111, OPMVV, (outs GPR:$rd),
                                 (ins VR:$vs2, uimm5:$imm),
                                 "ri.vextract.x.v", "$rd, $vs2, $imm">;
 }
+
+
+def ri_vextract : SDNode<"RISCVISD::RI_VEXTRACT",
+                         SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<1>,
+                                              SDTCisInt<2>,
+                                              SDTCisInt<1>]>>;
+
+def ri_vinsert_vl : SDNode<"RISCVISD::RI_VINSERT_VL",
+                           SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
+                                                SDTCisInt<0>,
+                                                SDTCisVT<2, XLenVT>,
+                                                SDTCisVT<3, XLenVT>,
+                                                SDTCisVT<4, XLenVT>]>>;
+
+let Predicates = [HasVendorXRivosVisni], mayLoad = 0, mayStore = 0,
+    hasSideEffects = 0, HasSEWOp = 1 in
+foreach m = MxList in {
+  defvar mx = m.MX;
+  let VLMul = m.value in {
+    let BaseInstr = RI_VEXTRACT in
+    def PseudoRI_VEXTRACT_  # mx :
+      Pseudo<(outs GPR:$rd), (ins m.vrclass:$rs2, uimm5:$idx, ixlenimm:$sew),
+             []>,
+      RISCVVPseudo;
+
+    let HasVLOp = 1, BaseInstr = RI_VINSERT, HasVecPolicyOp = 1,
+        Constraints = "$rd = $rs1" in
+    def PseudoRI_VINSERT_ # mx :
+      Pseudo<(outs m.vrclass:$rd),
+             (ins m.vrclass:$rs1, GPR:$rs2, uimm5:$idx, AVL:$vl,
+                  ixlenimm:$sew, ixlenimm:$policy),
+             []>,
+      RISCVVPseudo;
+  }
+}
+
+
+
+foreach vti = AllIntegerVectors in
+  let Predicates = GetVTypePredicates<vti>.Predicates in {
+    def : Pat<(XLenVT (ri_vextract (vti.Vector vti.RegClass:$vs2), uimm5:$imm)),
+              (!cast<Instruction>("PseudoRI_VEXTRACT_" # vti.LMul.MX)
+               $vs2, uimm5:$imm, vti.Log2SEW)>;
+
+    def : Pat<(vti.Vector (ri_vinsert_vl (vti.Vector vti.RegClass:$merge),
+                                          vti.ScalarRegClass:$rs1,
+                                          uimm5:$imm,
+                                          VLOpFrag,
+                                          (XLenVT timm:$policy))),
+              (!cast<Instruction>("PseudoRI_VINSERT_" # vti.LMul.MX)
+               $merge, vti.ScalarRegClass:$rs1, uimm5:$imm,
+               GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+
+  }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll

Original file line number	Diff line number	Diff line change
`@@ -94,6 +94,10 @@ static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {`
`94`	`94`	`}`
`95`	`95`	`}`
`96`	`96`
	`97`	`+static bool isVExtractInstr(const MachineInstr &MI) {`
	`98`	`+ return RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::RI_VEXTRACT;`
	`99`	`+}`
	`100`	`+`
`97`	`101`	`static bool isScalarExtractInstr(const MachineInstr &MI) {`
`98`	`102`	`switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {`
`99`	`103`	`default:`
`@@ -538,6 +542,12 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {`
`538`	`542`	`Res.MaskPolicy = false;`
`539`	`543`	`}`
`540`	`544`
	`545`	`+ if (isVExtractInstr(MI)) {`
	`546`	`+ assert(!RISCVII::hasVLOp(TSFlags));`
	`547`	`+ // TODO: LMUL can be any larger value (without cost)`
	`548`	`+ Res.TailPolicy = false;`
	`549`	`+ }`
	`550`	`+`
`541`	`551`	`return Res;`
`542`	`552`	`}`
`543`	`553`
`@@ -1085,7 +1095,7 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {`
`1085`	`1095`	`InstrInfo.setAVLRegDef(VNI, VLOp.getReg());`
`1086`	`1096`	`}`
`1087`	`1097`	`} else {`
`1088`		`- assert(isScalarExtractInstr(MI));`
	`1098`	`+ assert(isScalarExtractInstr(MI) \|\| isVExtractInstr(MI));`
`1089`	`1099`	`// Pick a random value for state tracking purposes, will be ignored via`
`1090`	`1100`	`// the demanded fields mechanism`
`1091`	`1101`	`InstrInfo.setAVLImm(1);`