Skip to content

Commit cfe22cd

Browse files
committed
[AArch64][SVE] Add new ld<n> intrinsics that return a struct of vscale types
This will allow us to reuse existing interleaved load logic in lowerInterleavedLoad that exists for neon types, but for SVE fixed types. The goal eventually will be to replace the existing ld<n> intriniscs with these, once a migration path has been sorted out. Differential Revision: https://reviews.llvm.org/D112078
1 parent 0bd6a9f commit cfe22cd

File tree

4 files changed

+944
-5
lines changed

4 files changed

+944
-5
lines changed

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,25 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
962962
LLVMPointerToElt<0>],
963963
[IntrReadMem, IntrArgMemOnly]>;
964964

965+
class AdvSIMD_2Vec_PredLoad_Intrinsic
966+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
967+
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
968+
LLVMPointerToElt<0>],
969+
[IntrReadMem, IntrArgMemOnly]>;
970+
971+
class AdvSIMD_3Vec_PredLoad_Intrinsic
972+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
973+
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
974+
LLVMPointerToElt<0>],
975+
[IntrReadMem, IntrArgMemOnly]>;
976+
977+
class AdvSIMD_4Vec_PredLoad_Intrinsic
978+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
979+
LLVMMatchType<0>],
980+
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
981+
LLVMPointerToElt<0>],
982+
[IntrReadMem, IntrArgMemOnly]>;
983+
965984
class AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic
966985
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
967986
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
@@ -1535,6 +1554,10 @@ def int_aarch64_sve_ld2 : AdvSIMD_ManyVec_PredLoad_Intrinsic;
15351554
def int_aarch64_sve_ld3 : AdvSIMD_ManyVec_PredLoad_Intrinsic;
15361555
def int_aarch64_sve_ld4 : AdvSIMD_ManyVec_PredLoad_Intrinsic;
15371556

1557+
def int_aarch64_sve_ld2_sret : AdvSIMD_2Vec_PredLoad_Intrinsic;
1558+
def int_aarch64_sve_ld3_sret : AdvSIMD_3Vec_PredLoad_Intrinsic;
1559+
def int_aarch64_sve_ld4_sret : AdvSIMD_4Vec_PredLoad_Intrinsic;
1560+
15381561
def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;
15391562
def int_aarch64_sve_ldnf1 : AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic;
15401563
def int_aarch64_sve_ldff1 : AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic;

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 69 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
286286
void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
287287
void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
288288
void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
289-
unsigned Opc_rr, unsigned Opc_ri);
289+
unsigned Opc_rr, unsigned Opc_ri,
290+
bool IsIntr = false);
290291

291292
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
292293
/// SVE Reg+Imm addressing mode.
@@ -1487,7 +1488,7 @@ AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
14871488

14881489
void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
14891490
unsigned Scale, unsigned Opc_ri,
1490-
unsigned Opc_rr) {
1491+
unsigned Opc_rr, bool IsIntr) {
14911492
assert(Scale < 4 && "Invalid scaling value.");
14921493
SDLoc DL(N);
14931494
EVT VT = N->getValueType(0);
@@ -1497,11 +1498,11 @@ void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
14971498
SDValue Base, Offset;
14981499
unsigned Opc;
14991500
std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1500-
N, Opc_rr, Opc_ri, N->getOperand(2),
1501+
N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
15011502
CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
15021503

1503-
SDValue Ops[] = {N->getOperand(1), // Predicate
1504-
Base, // Memory operand
1504+
SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1505+
Base, // Memory operand
15051506
Offset, Chain};
15061507

15071508
const EVT ResTys[] = {MVT::Untyped, MVT::Other};
@@ -3894,6 +3895,69 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
38943895
case Intrinsic::aarch64_ld64b:
38953896
SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
38963897
return;
3898+
case Intrinsic::aarch64_sve_ld2_sret: {
3899+
if (VT == MVT::nxv16i8) {
3900+
SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
3901+
true);
3902+
return;
3903+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
3904+
(VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
3905+
SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
3906+
true);
3907+
return;
3908+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
3909+
SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
3910+
true);
3911+
return;
3912+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
3913+
SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
3914+
true);
3915+
return;
3916+
}
3917+
break;
3918+
}
3919+
case Intrinsic::aarch64_sve_ld3_sret: {
3920+
if (VT == MVT::nxv16i8) {
3921+
SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
3922+
true);
3923+
return;
3924+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
3925+
(VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
3926+
SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
3927+
true);
3928+
return;
3929+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
3930+
SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
3931+
true);
3932+
return;
3933+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
3934+
SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
3935+
true);
3936+
return;
3937+
}
3938+
break;
3939+
}
3940+
case Intrinsic::aarch64_sve_ld4_sret: {
3941+
if (VT == MVT::nxv16i8) {
3942+
SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
3943+
true);
3944+
return;
3945+
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
3946+
(VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
3947+
SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
3948+
true);
3949+
return;
3950+
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
3951+
SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
3952+
true);
3953+
return;
3954+
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
3955+
SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
3956+
true);
3957+
return;
3958+
}
3959+
break;
3960+
}
38973961
}
38983962
} break;
38993963
case ISD::INTRINSIC_WO_CHAIN: {

0 commit comments

Comments
 (0)